qemu/hw/i386/pc.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1845 lines
60 KiB
C
Raw Normal View History

/*
* QEMU PC System Emulator
*
* Copyright (c) 2003-2004 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "hw/i386/pc.h"
#include "hw/char/serial.h"
#include "hw/char/parallel.h"
#include "hw/hyperv/hv-balloon.h"
#include "hw/i386/fw_cfg.h"
#include "hw/i386/vmport.h"
#include "sysemu/cpus.h"
#include "hw/ide/ide-bus.h"
#include "hw/timer/hpet.h"
#include "hw/loader.h"
#include "hw/rtc/mc146818rtc.h"
#include "hw/intc/i8259.h"
#include "hw/timer/i8254.h"
#include "hw/input/i8042.h"
#include "hw/audio/pcspk.h"
#include "sysemu/sysemu.h"
#include "sysemu/xen.h"
#include "sysemu/reset.h"
#include "kvm/kvm_i386.h"
#include "hw/xen/xen.h"
#include "qapi/qmp/qlist.h"
#include "qemu/error-report.h"
#include "hw/acpi/cpu_hotplug.h"
i386: ACPI table generation code from seabios This adds C code for generating ACPI tables at runtime, imported from seabios git tree commit 51684b7ced75fb76776e8ee84833fcfb6ecf12dd Although ACPI tables come from a system BIOS on real hw, it makes sense that the ACPI tables are coupled with the virtual machine, since they have to abstract the x86 machine to the OS's. This is widely desired as a way to avoid the churn and proliferation of QEMU-specific interfaces associated with ACPI tables in bios code. Notes: As BIOS can reprogram devices prior to loading ACPI tables, we pre-format ACPI tables but defer loading hardware configuration there until tables are loaded. The code structure was intentionally kept as close to the seabios original as possible, to simplify comparison and making sure we didn't lose anything in translation. Minor code duplication results, to help ensure there are no functional regressions, I think it's better to merge it like this and do more code changes in follow-up patches. Cross-version compatibility concerns have been addressed: ACPI tables are exposed to guest as FW_CFG entries. When running with -M 1.5 and older, this patch disables ACPI table generation, and doesn't expose ACPI tables to guest. As table content is likely to change over time, the following measures are taken to simplify cross-version migration: - All tables besides the RSDP are packed in a single FW CFG entry. This entry size is currently 23K. We round it up to 64K to avoid too much churn there. - Tables are placed in special ROM blob (not mapped into guest memory) which is automatically migrated together with the guest, same as BIOS code. - Offsets where hardware configuration is loaded in ACPI tables are also migrated, this is in case future ACPI changes make us rearrange the tables in memory. This patch reuses some code from SeaBIOS, which was originally under LGPLv2 and then relicensed to GPLv3 or LGPLv3, in QEMU under GPLv2+. This relicensing has been acked by all contributors that had contributed to the code since the v2->v3 relicense. ACKs approving the v2+ relicensing are listed below. The list might include ACKs from people not holding copyright on any parts of the reused code, but it's better to err on the side of caution and include them. Affected SeaBIOS files (GPLv2+ license headers added) <http://thread.gmane.org/gmane.comp.bios.coreboot.seabios/5949>: src/acpi-dsdt-cpu-hotplug.dsl src/acpi-dsdt-dbug.dsl src/acpi-dsdt-hpet.dsl src/acpi-dsdt-isa.dsl src/acpi-dsdt-pci-crs.dsl src/acpi.c src/acpi.h src/ssdt-misc.dsl src/ssdt-pcihp.dsl src/ssdt-proc.dsl tools/acpi_extract.py tools/acpi_extract_preprocess.py Each one of the listed people agreed to the following: > If you allow the use of your contribution in QEMU under the > terms of GPLv2 or later as proposed by this patch, > please respond to this mail including the line: > > Acked-by: Name <email address> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Acked-by: Jan Kiszka <jan.kiszka@siemens.com> Acked-by: Jason Baron <jbaron@akamai.com> Acked-by: David Woodhouse <David.Woodhouse@intel.com> Acked-by: Gleb Natapov <gleb@redhat.com> Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Dave Frodin <dave.frodin@se-eng.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Acked-by: Kevin O'Connor <kevin@koconnor.net> Acked-by: Laszlo Ersek <lersek@redhat.com> Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Acked-by: Isaku Yamahata <yamahata@valinux.co.jp> Acked-by: Magnus Christensson <magnus.christensson@intel.com> Acked-by: Hu Tao <hutao@cn.fujitsu.com> Acked-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Gerd Hoffmann <kraxel@redhat.com> Tested-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Tested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-07-24 19:56:14 +04:00
#include "acpi-build.h"
hw/i386: Include "hw/mem/nvdimm.h" All this files use methods/definitions declared in the NVDIMM device header. Include it. This fixes (when modifying unrelated headers): hw/i386/acpi-build.c:2733:9: error: implicit declaration of function 'nvdimm_build_acpi' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, ^ hw/i386/pc.c:1996:61: error: use of undeclared identifier 'TYPE_NVDIMM' const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); ^ hw/i386/pc.c:2032:55: error: use of undeclared identifier 'TYPE_NVDIMM' bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); ^ hw/i386/pc.c:2040:9: error: implicit declaration of function 'nvdimm_plug' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_plug(ms->nvdimms_state); ^ hw/i386/pc.c:2040:9: error: this function declaration is not a prototype [-Werror,-Wstrict-prototypes] nvdimm_plug(ms->nvdimms_state); ^ hw/i386/pc.c:2065:42: error: use of undeclared identifier 'TYPE_NVDIMM' if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { ^ hw/i386/pc_i440fx.c:307:9: error: implicit declaration of function 'nvdimm_init_acpi_state' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_init_acpi_state(machine->nvdimms_state, system_io, ^ hw/i386/pc_q35.c:332:9: error: implicit declaration of function 'nvdimm_init_acpi_state' is invalid in C99 [-Werror,-Wimplicit-function-declaration] nvdimm_init_acpi_state(machine->nvdimms_state, system_io, ^ Acked-by: John Snow <jsnow@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20200228114649.12818-17-philmd@redhat.com> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
2020-02-28 14:46:47 +03:00
#include "hw/mem/nvdimm.h"
#include "hw/cxl/cxl_host.h"
#include "hw/usb.h"
#include "hw/i386/intel_iommu.h"
#include "hw/net/ne2000-isa.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/virtio/virtio-md-pci.h"
#include "hw/i386/kvm/xen_overlay.h"
#include "hw/i386/kvm/xen_evtchn.h"
#include "hw/i386/kvm/xen_gnttab.h"
#include "hw/i386/kvm/xen_xenstore.h"
#include "hw/mem/memory-device.h"
#include "e820_memory_layout.h"
#include "trace.h"
#include "sev.h"
#include CONFIG_DEVICES
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen-legacy-backend.h"
#include "hw/xen/xen-bus.h"
#endif
/*
* Helper for setting model-id for CPU models that changed model-id
* depending on QEMU versions up to QEMU 2.4.
*/
#define PC_CPU_MODEL_IDS(v) \
{ "qemu32-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
{ "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
{ "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
GlobalProperty pc_compat_9_0[] = {
i386/cpu: Fix i/d-cache topology to core level for Intel CPU For i-cache and d-cache, current QEMU hardcodes the maximum IDs for CPUs sharing cache (CPUID.04H.00H:EAX[bits 25:14] and CPUID.04H.01H:EAX[bits 25:14]) to 0, and this means i-cache and d-cache are shared in the SMT level. This is correct if there's single thread per core, but is wrong for the hyper threading case (one core contains multiple threads) since the i-cache and d-cache are shared in the core level other than SMT level. For AMD CPU, commit 8f4202fb1080 ("i386: Populate AMD Processor Cache Information for cpuid 0x8000001D") has already introduced i/d cache topology as core level by default. Therefore, in order to be compatible with both multi-threaded and single-threaded situations, we should set i-cache and d-cache be shared at the core level by default. This fix changes the default i/d cache topology from per-thread to per-core. Potentially, this change in L1 cache topology may affect the performance of the VM if the user does not specifically specify the topology or bind the vCPU. However, the way to achieve optimal performance should be to create a reasonable topology and set the appropriate vCPU affinity without relying on QEMU's default topology structure. Fixes: 7e3482f82480 ("i386: Helpers to encode cache information consistently") Suggested-by: Robert Hoo <robert.hu@linux.intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Message-ID: <20240424154929.1487382-6-zhao1.liu@intel.com> [Add compat property. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2024-04-24 18:49:13 +03:00
{ TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
{ "sev-guest", "legacy-vm-type", "true" },
target/i386: Fix CPUID encoding of Fn8000001E_ECX Observed the following failure while booting the SEV-SNP guest and the guest fails to boot with the smp parameters: "-smp 192,sockets=1,dies=12,cores=8,threads=2". qemu-system-x86_64: sev_snp_launch_update: SNP_LAUNCH_UPDATE ret=-5 fw_error=22 'Invalid parameter' qemu-system-x86_64: SEV-SNP: CPUID validation failed for function 0x8000001e, index: 0x0. provided: eax:0x00000000, ebx: 0x00000100, ecx: 0x00000b00, edx: 0x00000000 expected: eax:0x00000000, ebx: 0x00000100, ecx: 0x00000300, edx: 0x00000000 qemu-system-x86_64: SEV-SNP: failed update CPUID page Reason for the failure is due to overflowing of bits used for "Node per processor" in CPUID Fn8000001E_ECX. This field's width is 3 bits wide and can hold maximum value 0x7. With dies=12 (0xB), it overflows and spills over into the reserved bits. In the case of SEV-SNP, this causes CPUID enforcement failure and guest fails to boot. The PPR documentation for CPUID_Fn8000001E_ECX [Node Identifiers] ================================================================= Bits Description 31:11 Reserved. 10:8 NodesPerProcessor: Node per processor. Read-only. ValidValues: Value Description 0h 1 node per processor. 7h-1h Reserved. 7:0 NodeId: Node ID. Read-only. Reset: Fixed,XXh. ================================================================= As in the spec, the valid value for "node per processor" is 0 and rest are reserved. Looking back at the history of decoding of CPUID_Fn8000001E_ECX, noticed that there were cases where "node per processor" can be more than 1. It is valid only for pre-F17h (pre-EPYC) architectures. For EPYC or later CPUs, the linux kernel does not use this information to build the L3 topology. Also noted that the CPUID Function 0x8000001E_ECX is available only when TOPOEXT feature is enabled. This feature is enabled only for EPYC(F17h) or later processors. So, previous generation of processors do not not enumerate 0x8000001E_ECX leaf. There could be some corner cases where the older guests could enable the TOPOEXT feature by running with -cpu host, in which case legacy guests might notice the topology change. To address those cases introduced a new CPU property "legacy-multi-node". It will be true for older machine types to maintain compatibility. By default, it will be false, so new decoding will be used going forward. The documentation is taken from Preliminary Processor Programming Reference (PPR) for AMD Family 19h Model 11h, Revision B1 Processors 55901 Rev 0.25 - Oct 6, 2022. Cc: qemu-stable@nongnu.org Fixes: 31ada106d891 ("Simplify CPUID_8000_001E for AMD") Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Signed-off-by: Babu Moger <babu.moger@amd.com> Message-ID: <0ee4b0a8293188a53970a2b0e4f4ef713425055e.1714757834.git.babu.moger@amd.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2024-05-03 20:46:30 +03:00
{ TYPE_X86_CPU, "legacy-multi-node", "on" },
};
const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
GlobalProperty pc_compat_8_2[] = {};
const size_t pc_compat_8_2_len = G_N_ELEMENTS(pc_compat_8_2);
GlobalProperty pc_compat_8_1[] = {};
const size_t pc_compat_8_1_len = G_N_ELEMENTS(pc_compat_8_1);
virtio-mem: Default to "unplugged-inaccessible=on" with 8.1 on x86-64 Allowing guests to read unplugged memory simplified the bring-up of virtio-mem in Linux guests -- which was limited to x86-64 only. On arm64 (which was added later), we never had legacy guests and don't even allow to configure it, essentially always having "unplugged-inaccessible=on". At this point, all guests we care about should be supporting VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, so let's change the default for the 8.1 machine. This change implies that also memory that supports the shared zeropage (private anonymous memory) will now require VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE in the driver in order to be usable by the guest -- as default, one can still manually set the unplugged-inaccessible property. Disallowing the guest to read unplugged memory will be important for some future features, such as memslot optimizations or protection of unplugged memory, whereby we'll actually no longer allow the guest to even read from unplugged memory. At some point, we might want to deprecate and remove that property. Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Richard Henderson <richard.henderson@linaro.org> Cc: Eduardo Habkost <eduardo@habkost.net> Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20230503182352.792458-1-david@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-03 21:23:52 +03:00
GlobalProperty pc_compat_8_0[] = {
{ "virtio-mem", "unplugged-inaccessible", "auto" },
};
const size_t pc_compat_8_0_len = G_N_ELEMENTS(pc_compat_8_0);
hw/isa: enable TCO watchdog reboot pin strap by default The TCO watchdog implementation default behaviour from POV of the guest OS relies on the initial values for two I/O ports: * TCO1_CNT == 0x0 Since bit 11 (TCO Timer Halt) is clear, the watchdog state is considered to be initially running * GCS == 0x20 Since bit 5 (No Reboot) is set, the watchdog will not trigger when the timer expires This is a safe default, because the No Reboot bit will prevent the watchdog from triggering if the guest OS is unaware of its existance, or is slow in configuring it. When a Linux guest initializes the TCO watchdog, it will attempt to clear the "No Reboot" flag, and read the value back. If the clear was honoured, the driver will treat this as an indicator that the watchdog is functional and create the guest watchdog device. QEMU implements a second "no reboot" flag, however, via pin straps which overrides the behaviour of the guest controlled "no reboot" flag: commit 5add35bec1e249bb5345a47008c8f298d4760be4 Author: Paulo Alcantara <pcacjr@gmail.com> Date: Sun Jun 28 14:58:58 2015 -0300 ich9: implement strap SPKR pin logic This second 'noreboot' pin was defaulted to high, which also inhibits triggering of the requested watchdog actions, unless QEMU is launched with the magic flag "-global ICH9-LPC.noreboot=false". This is a bad default as we are exposing a watchdog to every guest OS using the q35 machine type, but preventing it from actually doing what it is designed to do. What is worse is that the guest OS and its apps have no way to know that the watchdog is never going to fire, due to this second 'noreboot' pin. If a guest OS had no watchdog device at all, then apps whose operation and/or data integrity relies on a watchdog can refuse to launch, and alert the administrator of the problematic deployment. With Q35 machines unconditionally exposing a watchdog though, apps will think their deployment is correct but in fact have no protection at all. This patch flips the default of the second 'no reboot' flag, so that configured watchdog actions will be honoured out of the box for the 7.2 Q35 machine type onwards, if the guest enables use of the watchdog. See also related bug reports https://bugzilla.redhat.com/show_bug.cgi?id=2080207 https://bugzilla.redhat.com/show_bug.cgi?id=2136889 https://bugzilla.redhat.com/show_bug.cgi?id=2137346 Reviewed-by: Richard W.M. Jones <rjones@redhat.com> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Message-Id: <20221216125749.596075-5-berrange@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-12-16 15:57:48 +03:00
GlobalProperty pc_compat_7_2[] = {
{ "ICH9-LPC", "noreboot", "true" },
};
const size_t pc_compat_7_2_len = G_N_ELEMENTS(pc_compat_7_2);
GlobalProperty pc_compat_7_1[] = {};
const size_t pc_compat_7_1_len = G_N_ELEMENTS(pc_compat_7_1);
GlobalProperty pc_compat_7_0[] = {};
const size_t pc_compat_7_0_len = G_N_ELEMENTS(pc_compat_7_0);
GlobalProperty pc_compat_6_2[] = {
{ "virtio-mem", "unplugged-inaccessible", "off" },
};
const size_t pc_compat_6_2_len = G_N_ELEMENTS(pc_compat_6_2);
GlobalProperty pc_compat_6_1[] = {
{ TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" },
{ TYPE_X86_CPU, "hv-version-id-major", "0x0006" },
{ TYPE_X86_CPU, "hv-version-id-minor", "0x0001" },
{ "ICH9-LPC", "x-keep-pci-slot-hpc", "false" },
};
const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1);
GlobalProperty pc_compat_6_0[] = {
{ "qemu64" "-" TYPE_X86_CPU, "family", "6" },
{ "qemu64" "-" TYPE_X86_CPU, "model", "6" },
{ "qemu64" "-" TYPE_X86_CPU, "stepping", "3" },
{ TYPE_X86_CPU, "x-vendor-cpuid-only", "off" },
{ "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" },
{ "ICH9-LPC", "x-keep-pci-slot-hpc", "true" },
};
const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0);
GlobalProperty pc_compat_5_2[] = {
{ "ICH9-LPC", "x-smi-cpu-hotunplug", "off" },
};
const size_t pc_compat_5_2_len = G_N_ELEMENTS(pc_compat_5_2);
GlobalProperty pc_compat_5_1[] = {
{ "ICH9-LPC", "x-smi-cpu-hotplug", "off" },
{ TYPE_X86_CPU, "kvm-msi-ext-dest-id", "off" },
};
const size_t pc_compat_5_1_len = G_N_ELEMENTS(pc_compat_5_1);
hw/pci-host: save/restore pci host config register The pci host config register is used to save PCI address for read/write config data. If guest writes a value to config register, and then QEMU pauses the vcpu to migrate, after the migration, the guest will continue to write pci config data, and the write data will be ignored because of new qemu process losing the config register state. To trigger the bug: 1. guest is booting in seabios. 2. guest enables the SMRAM in seabios:piix4_apmc_smm_setup, and then expects to disable the SMRAM by pci_config_writeb. 3. after guest writes the pci host config register, QEMU pauses vcpu to finish migration. 4. guest write of config data(0x0A) fails to disable the SMRAM because the config register state is lost. 5. guest continues to boot and crashes in ipxe option ROM due to SMRAM in enabled state. Example Reproducer: step 1. Make modifications to seabios and qemu for increase reproduction efficiency, write 0xf0 to 0x402 port notify qemu to stop vcpu after 0x0cf8 port wrote i440 configure register. qemu stop vcpu when catch 0x402 port wrote 0xf0. seabios:/src/hw/pci.c @@ -52,6 +52,11 @@ void pci_config_writeb(u16 bdf, u32 addr, u8 val) writeb(mmconfig_addr(bdf, addr), val); } else { outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD); + if (bdf == 0 && addr == 0x72 && val == 0xa) { + dprintf(1, "stop vcpu\n"); + outb(0xf0, 0x402); // notify qemu to stop vcpu + dprintf(1, "resume vcpu\n"); + } outb(val, PORT_PCI_DATA + (addr & 3)); } } qemu:hw/char/debugcon.c @@ -60,6 +61,9 @@ static void debugcon_ioport_write(void *opaque, hwaddr addr, uint64_t val, printf(" [debugcon: write addr=0x%04" HWADDR_PRIx " val=0x%02" PRIx64 "]\n", addr, val); #endif + if (ch == 0xf0) { + vm_stop(RUN_STATE_PAUSED); + } /* XXX this blocks entire thread. Rewrite to use * qemu_chr_fe_write and background I/O callbacks */ qemu_chr_fe_write_all(&s->chr, &ch, 1); step 2. start vm1 by the following command line, and then vm stopped. $ qemu-system-x86_64 -machine pc-i440fx-5.0,accel=kvm\ -netdev tap,ifname=tap-test,id=hostnet0,vhost=on,downscript=no,script=no\ -device virtio-net-pci,netdev=hostnet0,id=net0,bus=pci.0,addr=0x13,bootindex=3\ -device cirrus-vga,id=video0,vgamem_mb=16,bus=pci.0,addr=0x2\ -chardev file,id=seabios,path=/var/log/test.seabios,append=on\ -device isa-debugcon,iobase=0x402,chardev=seabios\ -monitor stdio step 3. start vm2 to accept vm1 state. $ qemu-system-x86_64 -machine pc-i440fx-5.0,accel=kvm\ -netdev tap,ifname=tap-test1,id=hostnet0,vhost=on,downscript=no,script=no\ -device virtio-net-pci,netdev=hostnet0,id=net0,bus=pci.0,addr=0x13,bootindex=3\ -device cirrus-vga,id=video0,vgamem_mb=16,bus=pci.0,addr=0x2\ -chardev file,id=seabios,path=/var/log/test.seabios,append=on\ -device isa-debugcon,iobase=0x402,chardev=seabios\ -monitor stdio \ -incoming tcp:127.0.0.1:8000 step 4. execute the following qmp command in vm1 to migrate. (qemu) migrate tcp:127.0.0.1:8000 step 5. execute the following qmp command in vm2 to resume vcpu. (qemu) cont Before this patch, we get KVM "emulation failure" error on vm2. This patch fixes it. Cc: qemu-stable@nongnu.org Signed-off-by: Hogan Wang <hogan.wang@huawei.com> Message-Id: <20200727084621.3279-1-hogan.wang@huawei.com> Reported-by: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-07-27 11:46:20 +03:00
GlobalProperty pc_compat_5_0[] = {
};
const size_t pc_compat_5_0_len = G_N_ELEMENTS(pc_compat_5_0);
q35: implement 128K SMRAM at default SMBASE address It's not what real HW does, implementing which would be overkill [**] and would require complex cross stack changes (QEMU+firmware) to make it work. So considering that SMRAM is owned by MCH, for simplicity (ab)use reserved Q35 register, which allows QEMU and firmware easily init and make RAM at SMBASE available only from SMM context. Patch uses commit (2f295167e0 q35/mch: implement extended TSEG sizes) for inspiration and uses reserved register in config space at 0x9c offset [*] to extend q35 pci-host with ability to use 128K at 0x30000 as SMRAM and hide it (like TSEG) from non-SMM context. Usage: 1: write 0xff in the register 2: if the feature is supported, follow up read from the register should return 0x01. At this point RAM at 0x30000 is still available for SMI handler configuration from non-SMM context 3: writing 0x02 in the register, locks SMBASE area, making its contents available only from SMM context. In non-SMM context, reads return 0xff and writes are ignored. Further writes into the register are ignored until the system reset. *) https://www.mail-archive.com/qemu-devel@nongnu.org/msg455991.html **) https://www.mail-archive.com/qemu-devel@nongnu.org/msg646965.html Signed-off-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <1575896942-331151-3-git-send-email-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Tested-by: Laszlo Ersek <lersek@redhat.com>
2019-12-09 16:08:55 +03:00
GlobalProperty pc_compat_4_2[] = {
{ "mch", "smbase-smram", "off" },
};
const size_t pc_compat_4_2_len = G_N_ELEMENTS(pc_compat_4_2);
GlobalProperty pc_compat_4_1[] = {};
const size_t pc_compat_4_1_len = G_N_ELEMENTS(pc_compat_4_1);
GlobalProperty pc_compat_4_0[] = {};
const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
GlobalProperty pc_compat_3_1[] = {
{ "intel-iommu", "dma-drain", "off" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
{ "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" },
{ "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" },
{ "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" },
{ "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" },
{ "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" },
{ "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" },
{ "EPYC" "-" TYPE_X86_CPU, "npt", "off" },
{ "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" },
{ "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" },
{ "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" },
{ "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
{ "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" },
{ TYPE_X86_CPU, "x-intel-pt-auto-level", "off" },
};
const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1);
GlobalProperty pc_compat_3_0[] = {
{ TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" },
{ "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" },
{ "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" },
};
const size_t pc_compat_3_0_len = G_N_ELEMENTS(pc_compat_3_0);
GlobalProperty pc_compat_2_12[] = {
{ TYPE_X86_CPU, "legacy-cache", "on" },
{ TYPE_X86_CPU, "topoext", "off" },
{ "EPYC-" TYPE_X86_CPU, "xlevel", "0x8000000a" },
{ "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", "0x8000000a" },
};
const size_t pc_compat_2_12_len = G_N_ELEMENTS(pc_compat_2_12);
GlobalProperty pc_compat_2_11[] = {
{ TYPE_X86_CPU, "x-migrate-smi-count", "off" },
{ "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" },
};
const size_t pc_compat_2_11_len = G_N_ELEMENTS(pc_compat_2_11);
GlobalProperty pc_compat_2_10[] = {
{ TYPE_X86_CPU, "x-hv-max-vps", "0x40" },
{ "i440FX-pcihost", "x-pci-hole64-fix", "off" },
{ "q35-pcihost", "x-pci-hole64-fix", "off" },
};
const size_t pc_compat_2_10_len = G_N_ELEMENTS(pc_compat_2_10);
GlobalProperty pc_compat_2_9[] = {
{ "mch", "extended-tseg-mbytes", "0" },
};
const size_t pc_compat_2_9_len = G_N_ELEMENTS(pc_compat_2_9);
GlobalProperty pc_compat_2_8[] = {
{ TYPE_X86_CPU, "tcg-cpuid", "off" },
{ "kvmclock", "x-mach-use-reliable-get-clock", "off" },
{ "ICH9-LPC", "x-smi-broadcast", "off" },
{ TYPE_X86_CPU, "vmware-cpuid-freq", "off" },
{ "Haswell-" TYPE_X86_CPU, "stepping", "1" },
};
const size_t pc_compat_2_8_len = G_N_ELEMENTS(pc_compat_2_8);
GlobalProperty pc_compat_2_7[] = {
{ TYPE_X86_CPU, "l3-cache", "off" },
{ TYPE_X86_CPU, "full-cpuid-auto-level", "off" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" },
{ "isa-pcspk", "migrate", "off" },
};
const size_t pc_compat_2_7_len = G_N_ELEMENTS(pc_compat_2_7);
GlobalProperty pc_compat_2_6[] = {
{ TYPE_X86_CPU, "cpuid-0xb", "off" },
{ "vmxnet3", "romfile", "" },
{ TYPE_X86_CPU, "fill-mtrr-mask", "off" },
{ "apic-common", "legacy-instance-id", "on", }
};
const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
GlobalProperty pc_compat_2_5[] = {};
const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
GlobalProperty pc_compat_2_4[] = {
PC_CPU_MODEL_IDS("2.4.0")
{ "Haswell-" TYPE_X86_CPU, "abm", "off" },
{ "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
{ "Broadwell-" TYPE_X86_CPU, "abm", "off" },
{ "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
{ "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
{ TYPE_X86_CPU, "check", "off" },
{ "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
{ "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
{ "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
{ "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
{ "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
{ "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
{ "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
};
const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
{
GSIState *s;
s = g_new0(GSIState, 1);
if (kvm_ioapic_in_kernel()) {
kvm_pc_setup_irq_routing(pci_enabled);
}
*irqs = qemu_allocate_irqs(gsi_handler, s, IOAPIC_NUM_PINS);
return s;
}
static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
unsigned size)
{
}
static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
{
return 0xffffffffffffffffULL;
}
/* MS-DOS compatibility mode FPU exception support */
static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
unsigned size)
{
if (tcg_enabled()) {
cpu_set_ignne();
}
}
static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size)
{
return 0xffffffffffffffffULL;
}
/* PC cmos mappings */
#define REG_EQUIPMENT_BYTE 0x14
static void cmos_init_hd(MC146818RtcState *s, int type_ofs, int info_ofs,
int16_t cylinders, int8_t heads, int8_t sectors)
{
mc146818rtc_set_cmos_data(s, type_ofs, 47);
mc146818rtc_set_cmos_data(s, info_ofs, cylinders);
mc146818rtc_set_cmos_data(s, info_ofs + 1, cylinders >> 8);
mc146818rtc_set_cmos_data(s, info_ofs + 2, heads);
mc146818rtc_set_cmos_data(s, info_ofs + 3, 0xff);
mc146818rtc_set_cmos_data(s, info_ofs + 4, 0xff);
mc146818rtc_set_cmos_data(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
mc146818rtc_set_cmos_data(s, info_ofs + 6, cylinders);
mc146818rtc_set_cmos_data(s, info_ofs + 7, cylinders >> 8);
mc146818rtc_set_cmos_data(s, info_ofs + 8, sectors);
}
/* convert boot_device letter to something recognizable by the bios */
static int boot_device2nibble(char boot_device)
{
switch(boot_device) {
case 'a':
case 'b':
return 0x01; /* floppy boot */
case 'c':
return 0x02; /* hard drive boot */
case 'd':
return 0x03; /* CD-ROM boot */
case 'n':
return 0x04; /* Network boot */
}
return 0;
}
static void set_boot_dev(PCMachineState *pcms, MC146818RtcState *s,
const char *boot_device, Error **errp)
{
#define PC_MAX_BOOT_DEVICES 3
int nbds, bds[3] = { 0, };
int i;
nbds = strlen(boot_device);
if (nbds > PC_MAX_BOOT_DEVICES) {
error_setg(errp, "Too many boot devices for PC");
return;
}
for (i = 0; i < nbds; i++) {
bds[i] = boot_device2nibble(boot_device[i]);
if (bds[i] == 0) {
error_setg(errp, "Invalid boot device for PC: '%c'",
boot_device[i]);
return;
}
}
mc146818rtc_set_cmos_data(s, 0x3d, (bds[1] << 4) | bds[0]);
mc146818rtc_set_cmos_data(s, 0x38, (bds[2] << 4) | !pcms->fd_bootchk);
}
static void pc_boot_set(void *opaque, const char *boot_device, Error **errp)
{
PCMachineState *pcms = opaque;
X86MachineState *x86ms = X86_MACHINE(pcms);
set_boot_dev(pcms, MC146818_RTC(x86ms->rtc), boot_device, errp);
}
static void pc_cmos_init_floppy(MC146818RtcState *rtc_state, ISADevice *floppy)
{
int val, nb;
FloppyDriveType fd_type[2] = { FLOPPY_DRIVE_TYPE_NONE,
FLOPPY_DRIVE_TYPE_NONE };
#ifdef CONFIG_FDC_ISA
/* floppy type */
if (floppy) {
for (int i = 0; i < 2; i++) {
fd_type[i] = isa_fdc_get_drive_type(floppy, i);
}
}
#endif
val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
cmos_get_fd_drive_type(fd_type[1]);
mc146818rtc_set_cmos_data(rtc_state, 0x10, val);
val = mc146818rtc_get_cmos_data(rtc_state, REG_EQUIPMENT_BYTE);
nb = 0;
if (fd_type[0] != FLOPPY_DRIVE_TYPE_NONE) {
nb++;
}
if (fd_type[1] != FLOPPY_DRIVE_TYPE_NONE) {
nb++;
}
switch (nb) {
case 0:
break;
case 1:
val |= 0x01; /* 1 drive, ready for boot */
break;
case 2:
val |= 0x41; /* 2 drives, ready for boot */
break;
}
mc146818rtc_set_cmos_data(rtc_state, REG_EQUIPMENT_BYTE, val);
}
typedef struct check_fdc_state {
ISADevice *floppy;
bool multiple;
} CheckFdcState;
static int check_fdc(Object *obj, void *opaque)
{
CheckFdcState *state = opaque;
Object *fdc;
uint32_t iobase;
Error *local_err = NULL;
fdc = object_dynamic_cast(obj, TYPE_ISA_FDC);
if (!fdc) {
return 0;
}
iobase = object_property_get_uint(obj, "iobase", &local_err);
if (local_err || iobase != 0x3f0) {
error_free(local_err);
return 0;
}
if (state->floppy) {
state->multiple = true;
} else {
state->floppy = ISA_DEVICE(obj);
}
return 0;
}
static const char * const fdc_container_path[] = {
"/unattached", "/peripheral", "/peripheral-anon"
};
/*
* Locate the FDC at IO address 0x3f0, in order to configure the CMOS registers
* and ACPI objects.
*/
static ISADevice *pc_find_fdc0(void)
{
int i;
Object *container;
CheckFdcState state = { 0 };
for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) {
container = container_get(qdev_get_machine(), fdc_container_path[i]);
object_child_foreach(container, check_fdc, &state);
}
if (state.multiple) {
Convert error_report() to warn_report() Convert all uses of error_report("warning:"... to use warn_report() instead. This helps standardise on a single method of printing warnings to the user. All of the warnings were changed using these two commands: find ./* -type f -exec sed -i \ 's|error_report(".*warning[,:] |warn_report("|Ig' {} + Indentation fixed up manually afterwards. The test-qdev-global-props test case was manually updated to ensure that this patch passes make check (as the test cases are case sensitive). Signed-off-by: Alistair Francis <alistair.francis@xilinx.com> Suggested-by: Thomas Huth <thuth@redhat.com> Cc: Jeff Cody <jcody@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Cc: Max Reitz <mreitz@redhat.com> Cc: Ronnie Sahlberg <ronniesahlberg@gmail.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Lieven <pl@kamp.de> Cc: Josh Durgin <jdurgin@redhat.com> Cc: "Richard W.M. Jones" <rjones@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com> Cc: Richard Henderson <rth@twiddle.net> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Greg Kurz <groug@kaod.org> Cc: Rob Herring <robh@kernel.org> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Peter Chubb <peter.chubb@nicta.com.au> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Alexander Graf <agraf@suse.de> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Cornelia Huck <cohuck@redhat.com> Cc: Stefan Hajnoczi <stefanha@redhat.com> Acked-by: David Gibson <david@gibson.dropbear.id.au> Acked-by: Greg Kurz <groug@kaod.org> Acked-by: Cornelia Huck <cohuck@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed by: Peter Chubb <peter.chubb@data61.csiro.au> Acked-by: Max Reitz <mreitz@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Message-Id: <e1cfa2cd47087c248dd24caca9c33d9af0c499b0.1499866456.git.alistair.francis@xilinx.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2017-07-12 16:57:41 +03:00
warn_report("multiple floppy disk controllers with "
"iobase=0x3f0 have been found");
error_printf("the one being picked for CMOS setup might not reflect "
"your intent");
}
return state.floppy;
}
static void pc_cmos_init_late(PCMachineState *pcms)
{
X86MachineState *x86ms = X86_MACHINE(pcms);
MC146818RtcState *s = MC146818_RTC(x86ms->rtc);
int16_t cylinders;
int8_t heads, sectors;
int val;
int i, trans;
val = 0;
if (pcms->idebus[0] &&
ide_get_geometry(pcms->idebus[0], 0,
&cylinders, &heads, &sectors) >= 0) {
cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
val |= 0xf0;
}
if (pcms->idebus[0] &&
ide_get_geometry(pcms->idebus[0], 1,
&cylinders, &heads, &sectors) >= 0) {
cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
val |= 0x0f;
}
mc146818rtc_set_cmos_data(s, 0x12, val);
val = 0;
for (i = 0; i < 4; i++) {
/* NOTE: ide_get_geometry() returns the physical
geometry. It is always such that: 1 <= sects <= 63, 1
<= heads <= 16, 1 <= cylinders <= 16383. The BIOS
geometry can be different if a translation is done. */
BusState *idebus = pcms->idebus[i / 2];
if (idebus &&
ide_get_geometry(idebus, i % 2,
&cylinders, &heads, &sectors) >= 0) {
trans = ide_get_bios_chs_trans(idebus, i % 2) - 1;
assert((trans & ~3) == 0);
val |= trans << (i * 2);
}
}
mc146818rtc_set_cmos_data(s, 0x39, val);
pc_cmos_init_floppy(s, pc_find_fdc0());
/* various important CMOS locations needed by PC/Bochs bios */
/* memory size */
/* base memory (first MiB) */
val = MIN(x86ms->below_4g_mem_size / KiB, 640);
mc146818rtc_set_cmos_data(s, 0x15, val);
mc146818rtc_set_cmos_data(s, 0x16, val >> 8);
/* extended memory (next 64MiB) */
if (x86ms->below_4g_mem_size > 1 * MiB) {
val = (x86ms->below_4g_mem_size - 1 * MiB) / KiB;
} else {
val = 0;
}
if (val > 65535)
val = 65535;
mc146818rtc_set_cmos_data(s, 0x17, val);
mc146818rtc_set_cmos_data(s, 0x18, val >> 8);
mc146818rtc_set_cmos_data(s, 0x30, val);
mc146818rtc_set_cmos_data(s, 0x31, val >> 8);
/* memory between 16MiB and 4GiB */
if (x86ms->below_4g_mem_size > 16 * MiB) {
val = (x86ms->below_4g_mem_size - 16 * MiB) / (64 * KiB);
} else {
val = 0;
}
if (val > 65535)
val = 65535;
mc146818rtc_set_cmos_data(s, 0x34, val);
mc146818rtc_set_cmos_data(s, 0x35, val >> 8);
/* memory above 4GiB */
val = x86ms->above_4g_mem_size / 65536;
mc146818rtc_set_cmos_data(s, 0x5b, val);
mc146818rtc_set_cmos_data(s, 0x5c, val >> 8);
mc146818rtc_set_cmos_data(s, 0x5d, val >> 16);
val = 0;
val |= 0x02; /* FPU is there */
val |= 0x04; /* PS/2 mouse installed */
mc146818rtc_set_cmos_data(s, REG_EQUIPMENT_BYTE, val);
}
static void handle_a20_line_change(void *opaque, int irq, int level)
{
X86CPU *cpu = opaque;
/* XXX: send to all CPUs ? */
/* XXX: add logic to handle multiple A20 line sources */
x86_cpu_set_a20(cpu, level);
}
#define NE2000_NB_MAX 6
static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360,
0x280, 0x380 };
static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
static gboolean pc_init_ne2k_isa(ISABus *bus, NICInfo *nd, Error **errp)
{
static int nb_ne2k = 0;
if (nb_ne2k == NE2000_NB_MAX) {
error_setg(errp,
"maximum number of ISA NE2000 devices exceeded");
return false;
}
isa_ne2000_init(bus, ne2000_io[nb_ne2k],
ne2000_irq[nb_ne2k], nd);
nb_ne2k++;
return true;
}
void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
{
X86CPU *cpu = opaque;
if (level) {
cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
}
}
static
void pc_machine_done(Notifier *notifier, void *data)
{
PCMachineState *pcms = container_of(notifier,
PCMachineState, machine_done);
X86MachineState *x86ms = X86_MACHINE(pcms);
cxl_hook_up_pxb_registers(pcms->pcibus, &pcms->cxl_devices_state,
&error_fatal);
if (pcms->cxl_devices_state.is_enabled) {
cxl_fmws_link_targets(&pcms->cxl_devices_state, &error_fatal);
}
/* set the number of CPUs */
x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
fw_cfg_add_extra_pci_roots(pcms->pcibus, x86ms->fw_cfg);
acpi_setup();
if (x86ms->fw_cfg) {
fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type);
fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
}
pc_cmos_init_late(pcms);
}
/* setup pci memory address space mapping into system address space */
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
MemoryRegion *pci_address_space)
{
/* Set to lower priority than RAM */
memory_region_add_subregion_overlap(system_memory, 0x0,
pci_address_space, -1);
}
void xen_load_linux(PCMachineState *pcms)
{
int i;
FWCfgState *fw_cfg;
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
assert(MACHINE(pcms)->kernel_filename != NULL);
fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4,
&address_space_memory);
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
rom_set_fw(fw_cfg);
x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
pcmc->pvh_enabled);
for (i = 0; i < nb_option_roms; i++) {
assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
!strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
!strcmp(option_rom[i].name, "pvh.bin") ||
!strcmp(option_rom[i].name, "multiboot.bin") ||
!strcmp(option_rom[i].name, "multiboot_dma.bin"));
rom_add_option(option_rom[i].name, option_rom[i].bootindex);
}
x86ms->fw_cfg = fw_cfg;
}
#define PC_ROM_MIN_VGA 0xc0000
#define PC_ROM_MIN_OPTION 0xc8000
#define PC_ROM_MAX 0xe0000
#define PC_ROM_ALIGN 0x800
#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA)
static hwaddr pc_above_4g_end(PCMachineState *pcms)
{
X86MachineState *x86ms = X86_MACHINE(pcms);
if (pcms->sgx_epc.size != 0) {
return sgx_epc_above_4g_end(&pcms->sgx_epc);
}
return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
}
static void pc_get_device_memory_range(PCMachineState *pcms,
hwaddr *base,
ram_addr_t *device_mem_size)
{
MachineState *machine = MACHINE(pcms);
ram_addr_t size;
hwaddr addr;
size = machine->maxram_size - machine->ram_size;
addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
/* size device region assuming 1G page max alignment per slot */
size += (1 * GiB) * machine->ram_slots;
*base = addr;
*device_mem_size = size;
}
static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
{
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
hwaddr cxl_base;
ram_addr_t size;
if (pcmc->has_reserved_memory &&
(ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &cxl_base, &size);
cxl_base += size;
} else {
cxl_base = pc_above_4g_end(pcms);
}
return cxl_base;
}
static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
{
uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
if (pcms->cxl_devices_state.fixed_windows) {
GList *it;
start = ROUND_UP(start, 256 * MiB);
for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
start += fw->size;
}
}
return start;
}
static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
{
X86CPU *cpu = X86_CPU(first_cpu);
hw/i386/pc: improve physical address space bound check for 32-bit x86 systems 32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit systems without PSE36 or PAE CPU features, hotplugging memory devices are not supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary which is beyond the physical address space of the processor. Linux guests also does not support memory hotplug on those systems. Please see Linux kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality for 32b") for more details. Therefore, the maximum limit of the guest physical address in the absence of additional memory devices effectively coincides with the end of "above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users configure additional memory devices, after properly accounting for the additional device memory region to find the maximum value of the guest physical address, the address will be outside the range of the processor's physical address space. This change adds improvements to take above into consideration. For example, previously this was allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G With this change now it is no longer allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32) However, the following are allowed since on both cases physical address space of the processor is 36 bits: $ ./qemu-system-x86_64 -cpu pentium2 -m size=10G $ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed. $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) $ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps returning the old value for machines 8.1 and older. Therefore, the above is still allowed for older machine types in order to support compatibility. Hence, the following still works: $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2 $ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2 Further, following is also allowed as with PSE36, the processor has 36-bit address space: $ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2 After calling CPUID with EAX=0x80000001, all AMD64 compliant processors have the longmode-capable-bit turned on in the extended feature flags (bit 29) in EDX. The absence of CPUID longmode can be used to differentiate between 32-bit and 64-bit processors and is the recommended approach. QEMU takes this approach elsewhere (for example, please see x86_cpu_realizefn()), With this change, pc_max_used_gpa() also uses the same method to detect 32-bit processors. Unit tests are modified to not run 32-bit x86 tests that use memory hotplug. Suggested-by: David Hildenbrand <david@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Message-Id: <20230922160413.165702-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-09-22 19:04:13 +03:00
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
if (cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
/* 64-bit systems */
return pc_pci_hole64_start() + pci_hole64_size - 1;
}
hw/i386/pc: improve physical address space bound check for 32-bit x86 systems 32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit systems without PSE36 or PAE CPU features, hotplugging memory devices are not supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary which is beyond the physical address space of the processor. Linux guests also does not support memory hotplug on those systems. Please see Linux kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality for 32b") for more details. Therefore, the maximum limit of the guest physical address in the absence of additional memory devices effectively coincides with the end of "above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users configure additional memory devices, after properly accounting for the additional device memory region to find the maximum value of the guest physical address, the address will be outside the range of the processor's physical address space. This change adds improvements to take above into consideration. For example, previously this was allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G With this change now it is no longer allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32) However, the following are allowed since on both cases physical address space of the processor is 36 bits: $ ./qemu-system-x86_64 -cpu pentium2 -m size=10G $ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed. $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) $ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps returning the old value for machines 8.1 and older. Therefore, the above is still allowed for older machine types in order to support compatibility. Hence, the following still works: $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2 $ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2 Further, following is also allowed as with PSE36, the processor has 36-bit address space: $ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2 After calling CPUID with EAX=0x80000001, all AMD64 compliant processors have the longmode-capable-bit turned on in the extended feature flags (bit 29) in EDX. The absence of CPUID longmode can be used to differentiate between 32-bit and 64-bit processors and is the recommended approach. QEMU takes this approach elsewhere (for example, please see x86_cpu_realizefn()), With this change, pc_max_used_gpa() also uses the same method to detect 32-bit processors. Unit tests are modified to not run 32-bit x86 tests that use memory hotplug. Suggested-by: David Hildenbrand <david@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Message-Id: <20230922160413.165702-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-09-22 19:04:13 +03:00
/* 32-bit systems */
if (pcmc->broken_32bit_mem_addr_check) {
/* old value for compatibility reasons */
return ((hwaddr)1 << cpu->phys_bits) - 1;
}
hw/i386/pc: improve physical address space bound check for 32-bit x86 systems 32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit systems without PSE36 or PAE CPU features, hotplugging memory devices are not supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary which is beyond the physical address space of the processor. Linux guests also does not support memory hotplug on those systems. Please see Linux kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality for 32b") for more details. Therefore, the maximum limit of the guest physical address in the absence of additional memory devices effectively coincides with the end of "above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users configure additional memory devices, after properly accounting for the additional device memory region to find the maximum value of the guest physical address, the address will be outside the range of the processor's physical address space. This change adds improvements to take above into consideration. For example, previously this was allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G With this change now it is no longer allowed: $ ./qemu-system-x86_64 -cpu pentium -m size=10G qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32) However, the following are allowed since on both cases physical address space of the processor is 36 bits: $ ./qemu-system-x86_64 -cpu pentium2 -m size=10G $ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed. $ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) $ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2 qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32) A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps returning the old value for machines 8.1 and older. Therefore, the above is still allowed for older machine types in order to support compatibility. Hence, the following still works: $ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2 $ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2 Further, following is also allowed as with PSE36, the processor has 36-bit address space: $ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2 After calling CPUID with EAX=0x80000001, all AMD64 compliant processors have the longmode-capable-bit turned on in the extended feature flags (bit 29) in EDX. The absence of CPUID longmode can be used to differentiate between 32-bit and 64-bit processors and is the recommended approach. QEMU takes this approach elsewhere (for example, please see x86_cpu_realizefn()), With this change, pc_max_used_gpa() also uses the same method to detect 32-bit processors. Unit tests are modified to not run 32-bit x86 tests that use memory hotplug. Suggested-by: David Hildenbrand <david@redhat.com> Signed-off-by: Ani Sinha <anisinha@redhat.com> Reviewed-by: David Hildenbrand <david@redhat.com> Message-Id: <20230922160413.165702-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-09-22 19:04:13 +03:00
/*
* 32-bit systems don't have hole64 but they might have a region for
* memory devices. Even if additional hotplugged memory devices might
* not be usable by most guest OSes, we need to still consider them for
* calculating the highest possible GPA so that we can properly report
* if someone configures them on a CPU that cannot possibly address them.
*/
if (pcmc->has_reserved_memory &&
(ms->ram_size < ms->maxram_size)) {
hwaddr devmem_start;
ram_addr_t devmem_size;
pc_get_device_memory_range(pcms, &devmem_start, &devmem_size);
devmem_start += devmem_size;
return devmem_start - 1;
}
/* configuration without any memory hotplug */
return pc_above_4g_end(pcms) - 1;
}
i386/pc: relocate 4g start to 1T where applicable It is assumed that the whole GPA space is available to be DMA addressable, within a given address space limit, except for a tiny region before the 4G. Since Linux v5.4, VFIO validates whether the selected GPA is indeed valid i.e. not reserved by IOMMU on behalf of some specific devices or platform-defined restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with -EINVAL. AMD systems with an IOMMU are examples of such platforms and particularly may only have these ranges as allowed: 0000000000000000 - 00000000fedfffff (0 .. 3.982G) 00000000fef00000 - 000000fcffffffff (3.983G .. 1011.9G) 0000010000000000 - ffffffffffffffff (1Tb .. 16Pb[*]) We already account for the 4G hole, albeit if the guest is big enough we will fail to allocate a guest with >1010G due to the ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT). [*] there is another reserved region unrelated to HT that exists in the 256T boundary in Fam 17h according to Errata #1286, documeted also in "Open-Source Register Reference for AMD Family 17h Processors (PUB)" When creating the region above 4G, take into account that on AMD platforms the HyperTransport range is reserved and hence it cannot be used either as GPAs. On those cases rather than establishing the start of ram-above-4g to be 4G, relocate instead to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology", for more information on the underlying restriction of IOVAs. After accounting for the 1Tb hole on AMD hosts, mtree should look like: 0000000000000000-000000007fffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-000000007fffffff 0000010000000000-000001ff7fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 0000000080000000-000000ffffffffff If the relocation is done or the address space covers it, we also add the the reserved HT e820 range as reserved. Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough to address 1Tb (0xff ffff ffff). On AMD platforms, if a ram-above-4g relocation is attempted and the CPU wasn't configured with a big enough phys-bits, an error message will be printed due to the maxphysaddr vs maxusedaddr check previously added. Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Acked-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <20220719170014.27028-11-joao.m.martins@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-07-19 20:00:13 +03:00
/*
* AMD systems with an IOMMU have an additional hole close to the
* 1Tb, which are special GPAs that cannot be DMA mapped. Depending
* on kernel version, VFIO may or may not let you DMA map those ranges.
* Starting Linux v5.4 we validate it, and can't create guests on AMD machines
* with certain memory sizes. It's also wrong to use those IOVA ranges
* in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
* The ranges reserved for Hyper-Transport are:
*
* FD_0000_0000h - FF_FFFF_FFFFh
*
* The ranges represent the following:
*
* Base Address Top Address Use
*
* FD_0000_0000h FD_F7FF_FFFFh Reserved interrupt address space
* FD_F800_0000h FD_F8FF_FFFFh Interrupt/EOI IntCtl
* FD_F900_0000h FD_F90F_FFFFh Legacy PIC IACK
* FD_F910_0000h FD_F91F_FFFFh System Management
* FD_F920_0000h FD_FAFF_FFFFh Reserved Page Tables
* FD_FB00_0000h FD_FBFF_FFFFh Address Translation
* FD_FC00_0000h FD_FDFF_FFFFh I/O Space
* FD_FE00_0000h FD_FFFF_FFFFh Configuration
* FE_0000_0000h FE_1FFF_FFFFh Extended Configuration/Device Messages
* FE_2000_0000h FF_FFFF_FFFFh Reserved
*
* See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
* Table 3: Special Address Controls (GPA) for more information.
*/
#define AMD_HT_START 0xfd00000000UL
#define AMD_HT_END 0xffffffffffUL
#define AMD_ABOVE_1TB_START (AMD_HT_END + 1)
#define AMD_HT_SIZE (AMD_ABOVE_1TB_START - AMD_HT_START)
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
uint64_t pci_hole64_size)
{
int linux_boot, i;
MemoryRegion *option_rom_mr;
MemoryRegion *ram_below_4g, *ram_above_4g;
FWCfgState *fw_cfg;
MachineState *machine = MACHINE(pcms);
MachineClass *mc = MACHINE_GET_CLASS(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
hwaddr maxphysaddr, maxusedaddr;
hwaddr cxl_base, cxl_resv_end = 0;
X86CPU *cpu = X86_CPU(first_cpu);
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
linux_boot = (machine->kernel_filename != NULL);
i386/pc: relocate 4g start to 1T where applicable It is assumed that the whole GPA space is available to be DMA addressable, within a given address space limit, except for a tiny region before the 4G. Since Linux v5.4, VFIO validates whether the selected GPA is indeed valid i.e. not reserved by IOMMU on behalf of some specific devices or platform-defined restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with -EINVAL. AMD systems with an IOMMU are examples of such platforms and particularly may only have these ranges as allowed: 0000000000000000 - 00000000fedfffff (0 .. 3.982G) 00000000fef00000 - 000000fcffffffff (3.983G .. 1011.9G) 0000010000000000 - ffffffffffffffff (1Tb .. 16Pb[*]) We already account for the 4G hole, albeit if the guest is big enough we will fail to allocate a guest with >1010G due to the ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT). [*] there is another reserved region unrelated to HT that exists in the 256T boundary in Fam 17h according to Errata #1286, documeted also in "Open-Source Register Reference for AMD Family 17h Processors (PUB)" When creating the region above 4G, take into account that on AMD platforms the HyperTransport range is reserved and hence it cannot be used either as GPAs. On those cases rather than establishing the start of ram-above-4g to be 4G, relocate instead to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology", for more information on the underlying restriction of IOVAs. After accounting for the 1Tb hole on AMD hosts, mtree should look like: 0000000000000000-000000007fffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-000000007fffffff 0000010000000000-000001ff7fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 0000000080000000-000000ffffffffff If the relocation is done or the address space covers it, we also add the the reserved HT e820 range as reserved. Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough to address 1Tb (0xff ffff ffff). On AMD platforms, if a ram-above-4g relocation is attempted and the CPU wasn't configured with a big enough phys-bits, an error message will be printed due to the maxphysaddr vs maxusedaddr check previously added. Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Acked-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <20220719170014.27028-11-joao.m.martins@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-07-19 20:00:13 +03:00
/*
* The HyperTransport range close to the 1T boundary is unique to AMD
* hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
* to above 1T to AMD vCPUs only. @enforce_amd_1tb_hole is only false in
* older machine types (<= 7.0) for compatibility purposes.
i386/pc: relocate 4g start to 1T where applicable It is assumed that the whole GPA space is available to be DMA addressable, within a given address space limit, except for a tiny region before the 4G. Since Linux v5.4, VFIO validates whether the selected GPA is indeed valid i.e. not reserved by IOMMU on behalf of some specific devices or platform-defined restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with -EINVAL. AMD systems with an IOMMU are examples of such platforms and particularly may only have these ranges as allowed: 0000000000000000 - 00000000fedfffff (0 .. 3.982G) 00000000fef00000 - 000000fcffffffff (3.983G .. 1011.9G) 0000010000000000 - ffffffffffffffff (1Tb .. 16Pb[*]) We already account for the 4G hole, albeit if the guest is big enough we will fail to allocate a guest with >1010G due to the ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT). [*] there is another reserved region unrelated to HT that exists in the 256T boundary in Fam 17h according to Errata #1286, documeted also in "Open-Source Register Reference for AMD Family 17h Processors (PUB)" When creating the region above 4G, take into account that on AMD platforms the HyperTransport range is reserved and hence it cannot be used either as GPAs. On those cases rather than establishing the start of ram-above-4g to be 4G, relocate instead to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology", for more information on the underlying restriction of IOVAs. After accounting for the 1Tb hole on AMD hosts, mtree should look like: 0000000000000000-000000007fffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-000000007fffffff 0000010000000000-000001ff7fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 0000000080000000-000000ffffffffff If the relocation is done or the address space covers it, we also add the the reserved HT e820 range as reserved. Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough to address 1Tb (0xff ffff ffff). On AMD platforms, if a ram-above-4g relocation is attempted and the CPU wasn't configured with a big enough phys-bits, an error message will be printed due to the maxphysaddr vs maxusedaddr check previously added. Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Acked-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <20220719170014.27028-11-joao.m.martins@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-07-19 20:00:13 +03:00
*/
if (IS_AMD_CPU(&cpu->env) && pcmc->enforce_amd_1tb_hole) {
i386/pc: relocate 4g start to 1T where applicable It is assumed that the whole GPA space is available to be DMA addressable, within a given address space limit, except for a tiny region before the 4G. Since Linux v5.4, VFIO validates whether the selected GPA is indeed valid i.e. not reserved by IOMMU on behalf of some specific devices or platform-defined restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with -EINVAL. AMD systems with an IOMMU are examples of such platforms and particularly may only have these ranges as allowed: 0000000000000000 - 00000000fedfffff (0 .. 3.982G) 00000000fef00000 - 000000fcffffffff (3.983G .. 1011.9G) 0000010000000000 - ffffffffffffffff (1Tb .. 16Pb[*]) We already account for the 4G hole, albeit if the guest is big enough we will fail to allocate a guest with >1010G due to the ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT). [*] there is another reserved region unrelated to HT that exists in the 256T boundary in Fam 17h according to Errata #1286, documeted also in "Open-Source Register Reference for AMD Family 17h Processors (PUB)" When creating the region above 4G, take into account that on AMD platforms the HyperTransport range is reserved and hence it cannot be used either as GPAs. On those cases rather than establishing the start of ram-above-4g to be 4G, relocate instead to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology", for more information on the underlying restriction of IOVAs. After accounting for the 1Tb hole on AMD hosts, mtree should look like: 0000000000000000-000000007fffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-000000007fffffff 0000010000000000-000001ff7fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 0000000080000000-000000ffffffffff If the relocation is done or the address space covers it, we also add the the reserved HT e820 range as reserved. Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough to address 1Tb (0xff ffff ffff). On AMD platforms, if a ram-above-4g relocation is attempted and the CPU wasn't configured with a big enough phys-bits, an error message will be printed due to the maxphysaddr vs maxusedaddr check previously added. Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Acked-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <20220719170014.27028-11-joao.m.martins@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-07-19 20:00:13 +03:00
/* Bail out if max possible address does not cross HT range */
if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
}
/*
* Advertise the HT region if address space covers the reserved
* region or if we relocate.
*/
if (cpu->phys_bits >= 40) {
e820_add_entry(AMD_HT_START, AMD_HT_SIZE, E820_RESERVED);
}
}
/*
* phys-bits is required to be appropriately configured
* to make sure max used GPA is reachable.
*/
maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
if (maxphysaddr < maxusedaddr) {
error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
" phys-bits too low (%u)",
maxphysaddr, maxusedaddr, cpu->phys_bits);
exit(EXIT_FAILURE);
}
/*
* Split single memory region and use aliases to address portions of it,
* done for backwards compatibility with older qemus.
*/
ram_below_4g = g_malloc(sizeof(*ram_below_4g));
memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", machine->ram,
0, x86ms->below_4g_mem_size);
memory_region_add_subregion(system_memory, 0, ram_below_4g);
e820_add_entry(0, x86ms->below_4g_mem_size, E820_RAM);
if (x86ms->above_4g_mem_size > 0) {
ram_above_4g = g_malloc(sizeof(*ram_above_4g));
memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g",
machine->ram,
x86ms->below_4g_mem_size,
x86ms->above_4g_mem_size);
memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start,
ram_above_4g);
e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
E820_RAM);
}
if (pcms->sgx_epc.size != 0) {
e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED);
}
if (!pcmc->has_reserved_memory &&
(machine->ram_slots ||
(machine->maxram_size > machine->ram_size))) {
error_report("\"-memory 'slots|maxmem'\" is not supported by: %s",
mc->name);
exit(EXIT_FAILURE);
}
/* initialize device memory address space */
if (pcmc->has_reserved_memory &&
(machine->ram_size < machine->maxram_size)) {
ram_addr_t device_mem_size;
hwaddr device_mem_base;
if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
error_report("unsupported amount of memory slots: %"PRIu64,
machine->ram_slots);
exit(EXIT_FAILURE);
}
if (QEMU_ALIGN_UP(machine->maxram_size,
TARGET_PAGE_SIZE) != machine->maxram_size) {
error_report("maximum memory size must by aligned to multiple of "
"%d bytes", TARGET_PAGE_SIZE);
exit(EXIT_FAILURE);
}
pc_get_device_memory_range(pcms, &device_mem_base, &device_mem_size);
if (device_mem_base + device_mem_size < device_mem_size) {
error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT,
machine->maxram_size);
exit(EXIT_FAILURE);
}
machine_memory_devices_init(machine, device_mem_base, device_mem_size);
}
if (pcms->cxl_devices_state.is_enabled) {
MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
hwaddr cxl_size = MiB;
cxl_base = pc_get_cxl_range_start(pcms);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
cxl_resv_end = cxl_base + cxl_size;
if (pcms->cxl_devices_state.fixed_windows) {
hwaddr cxl_fmw_base;
GList *it;
cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB);
for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
CXLFixedWindow *fw = it->data;
fw->base = cxl_fmw_base;
memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
"cxl-fixed-memory-region", fw->size);
memory_region_add_subregion(system_memory, fw->base, &fw->mr);
cxl_fmw_base += fw->size;
cxl_resv_end = cxl_fmw_base;
}
}
}
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
if (machine_require_guest_memfd(machine)) {
memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
PC_ROM_SIZE, &error_fatal);
} else {
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
&error_fatal);
if (pcmc->pci_enabled) {
memory_region_set_readonly(option_rom_mr, true);
}
pc: make 'pc.rom' readonly when machine has PCI enabled looking at bios ROM mapping in QEMU it seems that only isapc (i.e. not PCI enabled machine) requires ROM being mapped as RW in other cases BIOS is mapped as RO. Do the same for option ROM 'pc.rom' when machine has PCI enabled. As useful side-effect pc.rom MemoryRegion stops being put in vhost memory map (filtered out by vhost_section()), which reduces number of entries by 1. Coincidentally it fixes migration failure reported in "[PATCH V2] vhost: fix a migration failed because of vhost region merge" where following destination CLI with /sys/module/vhost/parameters/max_mem_regions = 8 export DIMMSCOUNT=6 QEMU -enable-kvm \ -netdev type=tap,id=guest0,vhost=on,script=no,vhostforce \ -device virtio-net-pci,netdev=guest0 \ -m 256,slots=256,maxmem=2G \ `i=0; while [ $i -lt $DIMMSCOUNT ]; do echo \ "-object memory-backend-ram,id=m$i,size=128M \ -device pc-dimm,id=d$i,memdev=m$i"; i=$(($i + 1)); \ done` will fail to startup with error: "-device pc-dimm,id=d5,memdev=m5: a used vhost backend has no free memory slots left" while it's possible to add the 6th DIMM during hotplug on source. Issue is caused by the fact that number of entries in vhost map is bigger on 1 entry, when -device is processed, than after guest boots up, and that offending entry belongs to 'pc.rom', it's not like vhost intends to do IO in ROM range so making it RO hides region from vhost and makes number of entries in vhost memory map at -device/machine_done time match number of entries after guest boots. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Reported-by: Peng Hao <peng.hao2@zte.com.cn> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-07-28 12:09:05 +03:00
}
memory_region_add_subregion_overlap(rom_memory,
PC_ROM_MIN_VGA,
option_rom_mr,
1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
rom_set_fw(fw_cfg);
if (machine->device_memory) {
uint64_t *val = g_malloc(sizeof(*val));
uint64_t res_mem_end = machine->device_memory->base;
if (!pcmc->broken_reserved_end) {
res_mem_end += memory_region_size(&machine->device_memory->mr);
}
if (pcms->cxl_devices_state.is_enabled) {
res_mem_end = cxl_resv_end;
}
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
}
if (linux_boot) {
x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
pcmc->pvh_enabled);
}
for (i = 0; i < nb_option_roms; i++) {
rom_add_option(option_rom[i].name, option_rom[i].bootindex);
}
x86ms->fw_cfg = fw_cfg;
/* Init default IOAPIC address space */
x86ms->ioapic_as = &address_space_memory;
/* Init ACPI memory hotplug IO base address */
pcms->memhp_io_base = ACPI_MEMORY_HOTPLUG_BASE;
}
/*
* The 64bit pci hole starts after "above 4G RAM" and
* potentially the space reserved for memory hotplug.
*/
uint64_t pc_pci_hole64_start(void)
{
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
uint64_t hole64_start = 0;
ram_addr_t size = 0;
if (pcms->cxl_devices_state.is_enabled) {
hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &hole64_start, &size);
if (!pcmc->broken_reserved_end) {
hole64_start += size;
}
} else {
hole64_start = pc_above_4g_end(pcms);
}
return ROUND_UP(hole64_start, 1 * GiB);
}
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
{
DeviceState *dev = NULL;
rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
if (pci_bus) {
PCIDevice *pcidev = pci_vga_init(pci_bus);
dev = pcidev ? &pcidev->qdev : NULL;
} else if (isa_bus) {
ISADevice *isadev = isa_vga_init(isa_bus);
dev = isadev ? DEVICE(isadev) : NULL;
}
rom_reset_order_override();
return dev;
}
static const MemoryRegionOps ioport80_io_ops = {
.write = ioport80_write,
.read = ioport80_read,
.endianness = DEVICE_NATIVE_ENDIAN,
.impl = {
.min_access_size = 1,
.max_access_size = 1,
},
};
static const MemoryRegionOps ioportF0_io_ops = {
.write = ioportF0_write,
.read = ioportF0_read,
.endianness = DEVICE_NATIVE_ENDIAN,
.impl = {
.min_access_size = 1,
.max_access_size = 1,
},
};
static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl,
bool create_i8042, bool no_vmport)
{
int i;
DriveInfo *fd[MAX_FD];
qemu_irq *a20_line;
ISADevice *i8042, *port92, *vmmouse;
serial_hds_isa_init(isa_bus, 0, MAX_ISA_SERIAL_PORTS);
parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS);
for (i = 0; i < MAX_FD; i++) {
fd[i] = drive_get(IF_FLOPPY, 0, i);
create_fdctrl |= !!fd[i];
}
if (create_fdctrl) {
#ifdef CONFIG_FDC_ISA
ISADevice *fdc = isa_new(TYPE_ISA_FDC);
if (fdc) {
isa_realize_and_unref(fdc, isa_bus, &error_fatal);
isa_fdc_init_drives(fdc, fd);
}
#endif
}
if (!create_i8042) {
return;
}
i8042 = isa_create_simple(isa_bus, TYPE_I8042);
if (!no_vmport) {
isa_create_simple(isa_bus, TYPE_VMPORT);
vmmouse = isa_try_new("vmmouse");
} else {
vmmouse = NULL;
}
if (vmmouse) {
object_property_set_link(OBJECT(vmmouse), TYPE_I8042, OBJECT(i8042),
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
&error_abort);
isa_realize_and_unref(vmmouse, isa_bus, &error_fatal);
}
port92 = isa_create_simple(isa_bus, TYPE_PORT92);
a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
qdev_connect_gpio_out_named(DEVICE(i8042),
I8042_A20_LINE, 0, a20_line[0]);
qdev_connect_gpio_out_named(DEVICE(port92),
PORT92_A20_LINE, 0, a20_line[1]);
g_free(a20_line);
}
void pc_basic_device_init(struct PCMachineState *pcms,
ISABus *isa_bus, qemu_irq *gsi,
ISADevice *rtc_state,
bool create_fdctrl,
uint32_t hpet_irqs)
{
int i;
DeviceState *hpet = NULL;
int pit_isa_irq = 0;
qemu_irq pit_alt_irq = NULL;
ISADevice *pit = NULL;
MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
X86MachineState *x86ms = X86_MACHINE(pcms);
memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1);
memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
/*
* Check if an HPET shall be created.
*/
if (pcms->hpet_enabled) {
qemu_irq rtc_irq;
hpet = qdev_try_new(TYPE_HPET);
if (!hpet) {
error_report("couldn't create HPET device");
exit(1);
}
/*
* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-*,
* use IRQ16~23, IRQ8 and IRQ2. If the user has already set
* the property, use whatever mask they specified.
*/
uint8_t compat = object_property_get_uint(OBJECT(hpet),
HPET_INTCAP, NULL);
if (!compat) {
qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs);
}
sysbus_realize_and_unref(SYS_BUS_DEVICE(hpet), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE);
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]);
}
pit_isa_irq = -1;
pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT);
rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT);
/* overwrite connection created by south bridge */
qdev_connect_gpio_out(DEVICE(rtc_state), 0, rtc_irq);
}
object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(rtc_state),
"date");
hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback The GSI callback (and later PCI_INTX) is a level triggered interrupt. It is asserted when an event channel is delivered to vCPU0, and is supposed to be cleared when the vcpu_info->evtchn_upcall_pending field for vCPU0 is cleared again. Thankfully, Xen does *not* assert the GSI if the guest sets its own evtchn_upcall_pending field; we only need to assert the GSI when we have delivered an event for ourselves. So that's the easy part, kind of. There's a slight complexity in that we need to hold the BQL before we can call qemu_set_irq(), and we definitely can't do that while holding our own port_lock (because we'll need to take that from the qemu-side functions that the PV backend drivers will call). So if we end up wanting to set the IRQ in a context where we *don't* already hold the BQL, defer to a BH. However, we *do* need to poll for the evtchn_upcall_pending flag being cleared. In an ideal world we would poll that when the EOI happens on the PIC/IOAPIC. That's how it works in the kernel with the VFIO eventfd pairs — one is used to trigger the interrupt, and the other works in the other direction to 'resample' on EOI, and trigger the first eventfd again if the line is still active. However, QEMU doesn't seem to do that. Even VFIO level interrupts seem to be supported by temporarily unmapping the device's BARs from the guest when an interrupt happens, then trapping *all* MMIO to the device and sending the 'resample' event on *every* MMIO access until the IRQ is cleared! Maybe in future we'll plumb the 'resample' concept through QEMU's irq framework but for now we'll do what Xen itself does: just check the flag on every vmexit if the upcall GSI is known to be asserted. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Paul Durrant <paul@xen.org>
2022-12-15 23:35:24 +03:00
#ifdef CONFIG_XEN_EMU
if (xen_mode == XEN_EMULATE) {
hw/xen: Simplify emulated Xen platform init I initially put the basic platform init (overlay pages, grant tables, event channels) into mc->kvm_type because that was the earliest place that could sensibly test for xen_mode==XEN_EMULATE. The intent was to do this early enough that we could then initialise the XenBus and other parts which would have depended on them, from a generic location for both Xen and KVM/Xen in the PC-specific code, as seen in https://lore.kernel.org/qemu-devel/20230116221919.1124201-16-dwmw2@infradead.org/ However, then the Xen on Arm patches came along, and *they* wanted to do the XenBus init from a 'generic' Xen-specific location instead: https://lore.kernel.org/qemu-devel/20230210222729.957168-4-sstabellini@kernel.org/ Since there's no generic location that covers all three, I conceded to do it for XEN_EMULATE mode in pc_basic_devices_init(). And now there's absolutely no point in having some of the platform init done from pc_machine_kvm_type(); we can move it all up to live in a single place in pc_basic_devices_init(). This has the added benefit that we can drop the separate xen_evtchn_connect_gsis() function completely, and pass just the system GSIs in directly to xen_evtchn_create(). While I'm at it, it does no harm to explicitly pass in the *number* of said GSIs, because it does make me twitch a bit to pass an array of impicit size. During the lifetime of the KVM/Xen patchset, that had already changed (albeit just cosmetically) from GSI_NUM_PINS to IOAPIC_NUM_PINS. And document a bit better that this is for the *output* GSI for raising CPU0's events when the per-CPU vector isn't available. The fact that we create a whole set of them and then only waggle the one we're told to, instead of having a single output and only *connecting* it to the GSI that it should be connected to, is still non-intuitive for me. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Paul Durrant <paul@xen.org> Message-Id: <20230412185102.441523-2-dwmw2@infradead.org> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
2023-04-12 21:50:58 +03:00
xen_overlay_create();
xen_evtchn_create(IOAPIC_NUM_PINS, gsi);
xen_gnttab_create();
xen_xenstore_create();
if (pcms->pcibus) {
pci_create_simple(pcms->pcibus, -1, "xen-platform");
}
xen_bus_init();
hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback The GSI callback (and later PCI_INTX) is a level triggered interrupt. It is asserted when an event channel is delivered to vCPU0, and is supposed to be cleared when the vcpu_info->evtchn_upcall_pending field for vCPU0 is cleared again. Thankfully, Xen does *not* assert the GSI if the guest sets its own evtchn_upcall_pending field; we only need to assert the GSI when we have delivered an event for ourselves. So that's the easy part, kind of. There's a slight complexity in that we need to hold the BQL before we can call qemu_set_irq(), and we definitely can't do that while holding our own port_lock (because we'll need to take that from the qemu-side functions that the PV backend drivers will call). So if we end up wanting to set the IRQ in a context where we *don't* already hold the BQL, defer to a BH. However, we *do* need to poll for the evtchn_upcall_pending flag being cleared. In an ideal world we would poll that when the EOI happens on the PIC/IOAPIC. That's how it works in the kernel with the VFIO eventfd pairs — one is used to trigger the interrupt, and the other works in the other direction to 'resample' on EOI, and trigger the first eventfd again if the line is still active. However, QEMU doesn't seem to do that. Even VFIO level interrupts seem to be supported by temporarily unmapping the device's BARs from the guest when an interrupt happens, then trapping *all* MMIO to the device and sending the 'resample' event on *every* MMIO access until the IRQ is cleared! Maybe in future we'll plumb the 'resample' concept through QEMU's irq framework but for now we'll do what Xen itself does: just check the flag on every vmexit if the upcall GSI is known to be asserted. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Paul Durrant <paul@xen.org>
2022-12-15 23:35:24 +03:00
}
#endif
qemu_register_boot_set(pc_boot_set, pcms);
set_boot_dev(pcms, MC146818_RTC(rtc_state),
MACHINE(pcms)->boot_config.order, &error_fatal);
if (!xen_enabled() &&
(x86ms->pit == ON_OFF_AUTO_AUTO || x86ms->pit == ON_OFF_AUTO_ON)) {
if (kvm_pit_in_kernel()) {
pit = kvm_pit_init(isa_bus, 0x40);
} else {
pit = i8254_pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq);
}
if (hpet) {
/* connect PIT to output control line of the HPET */
qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0));
}
object_property_set_link(OBJECT(pcms->pcspk), "pit",
OBJECT(pit), &error_fatal);
isa_realize_and_unref(pcms->pcspk, isa_bus, &error_fatal);
}
/* Super I/O */
pc_superio_init(isa_bus, create_fdctrl, pcms->i8042_enabled,
pcms->vmport != ON_OFF_AUTO_ON);
}
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
{
MachineClass *mc = MACHINE_CLASS(pcmc);
bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000);
NICInfo *nd;
rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) {
pc_init_ne2k_isa(isa_bus, nd, &error_fatal);
}
/* Anything remaining should be a PCI NIC */
pci_init_nic_devices(pci_bus, mc->default_nic);
rom_reset_order_override();
}
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
{
qemu_irq *i8259;
if (kvm_pic_in_kernel()) {
i8259 = kvm_i8259_init(isa_bus);
} else if (xen_enabled()) {
i8259 = xen_interrupt_controller_init();
} else {
i8259 = i8259_init(isa_bus, x86_allocate_cpu_irq());
}
for (size_t i = 0; i < ISA_NUM_IRQS; i++) {
i8259_irqs[i] = i8259[i];
}
g_free(i8259);
}
static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
const X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
const MachineState *ms = MACHINE(hotplug_dev);
const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
Error *local_err = NULL;
/*
* When "acpi=off" is used with the Q35 machine type, no ACPI is built,
* but pcms->acpi_dev is still created. Check !acpi_enabled in
* addition to cover this case.
*/
if (!x86ms->acpi_dev || !x86_machine_is_acpi_enabled(x86ms)) {
error_setg(errp,
"memory hotplug is not enabled: missing acpi device or acpi disabled");
return;
}
if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
return;
}
hotplug_handler_pre_plug(x86ms->acpi_dev, dev, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), errp);
}
static void pc_memory_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
MachineState *ms = MACHINE(hotplug_dev);
hw/i386: fix nvdimm check error path Commit e987c37aee1752177906847630d32477da57e705 ("hw/i386: check if nvdimm is enabled before plugging") introduced a check to reject nvdimm hotplug if -machine pc,nvdimm=on was not given. This check executes after pc_dimm_memory_plug() has already completed and does not reverse the effect of this function in the case of failure. Perform the check before calling pc_dimm_memory_plug(). This fixes the following abort: $ qemu -M accel=kvm -m 1G,slots=4,maxmem=8G \ -object memory-backend-file,id=mem1,share=on,mem-path=nvdimm.dat,size=1G (qemu) device_add nvdimm,memdev=mem1 nvdimm is not enabled: missing 'nvdimm' in '-M' (qemu) device_add nvdimm,memdev=mem1 Core dumped The backtrace is: #0 0x00007fffdb5b191f in raise () at /lib64/libc.so.6 #1 0x00007fffdb5b351a in abort () at /lib64/libc.so.6 #2 0x00007fffdb5a9da7 in __assert_fail_base () at /lib64/libc.so.6 #3 0x00007fffdb5a9e52 in () at /lib64/libc.so.6 #4 0x000055555577a5fa in qemu_ram_set_idstr (new_block=0x555556747a00, name=<optimized out>, dev=dev@entry=0x555556705590) at qemu/exec.c:1709 #5 0x0000555555a0fe86 in vmstate_register_ram (mr=mr@entry=0x55555673a0e0, dev=dev@entry=0x555556705590) at migration/savevm.c:2293 #6 0x0000555555965088 in pc_dimm_memory_plug (dev=dev@entry=0x555556705590, hpms=hpms@entry=0x5555566bb0e0, mr=mr@entry=0x555556705630, align=<optimized out>, errp=errp@entry=0x7fffffffc660) at hw/mem/pc-dimm.c:110 #7 0x000055555581d89b in pc_dimm_plug (errp=0x7fffffffc6c0, dev=0x555556705590, hotplug_dev=<optimized out>) at qemu/hw/i386/pc.c:1713 #8 0x000055555581d89b in pc_machine_device_plug_cb (hotplug_dev=<optimized out>, dev=0x555556705590, errp=0x7fffffffc6c0) at qemu/hw/i386/pc.c:2004 #9 0x0000555555914da6 in device_set_realized (obj=<optimized out>, value=<optimized out>, errp=0x7fffffffc7e8) at hw/core/qdev.c:926 Cc: Haozhong Zhang <haozhong.zhang@intel.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Haozhong Zhang <haozhong.zhang@intel.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 18:16:15 +03:00
bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
pc_dimm_plug(PC_DIMM(dev), MACHINE(pcms));
hw/i386: fix nvdimm check error path Commit e987c37aee1752177906847630d32477da57e705 ("hw/i386: check if nvdimm is enabled before plugging") introduced a check to reject nvdimm hotplug if -machine pc,nvdimm=on was not given. This check executes after pc_dimm_memory_plug() has already completed and does not reverse the effect of this function in the case of failure. Perform the check before calling pc_dimm_memory_plug(). This fixes the following abort: $ qemu -M accel=kvm -m 1G,slots=4,maxmem=8G \ -object memory-backend-file,id=mem1,share=on,mem-path=nvdimm.dat,size=1G (qemu) device_add nvdimm,memdev=mem1 nvdimm is not enabled: missing 'nvdimm' in '-M' (qemu) device_add nvdimm,memdev=mem1 Core dumped The backtrace is: #0 0x00007fffdb5b191f in raise () at /lib64/libc.so.6 #1 0x00007fffdb5b351a in abort () at /lib64/libc.so.6 #2 0x00007fffdb5a9da7 in __assert_fail_base () at /lib64/libc.so.6 #3 0x00007fffdb5a9e52 in () at /lib64/libc.so.6 #4 0x000055555577a5fa in qemu_ram_set_idstr (new_block=0x555556747a00, name=<optimized out>, dev=dev@entry=0x555556705590) at qemu/exec.c:1709 #5 0x0000555555a0fe86 in vmstate_register_ram (mr=mr@entry=0x55555673a0e0, dev=dev@entry=0x555556705590) at migration/savevm.c:2293 #6 0x0000555555965088 in pc_dimm_memory_plug (dev=dev@entry=0x555556705590, hpms=hpms@entry=0x5555566bb0e0, mr=mr@entry=0x555556705630, align=<optimized out>, errp=errp@entry=0x7fffffffc660) at hw/mem/pc-dimm.c:110 #7 0x000055555581d89b in pc_dimm_plug (errp=0x7fffffffc6c0, dev=0x555556705590, hotplug_dev=<optimized out>) at qemu/hw/i386/pc.c:1713 #8 0x000055555581d89b in pc_machine_device_plug_cb (hotplug_dev=<optimized out>, dev=0x555556705590, errp=0x7fffffffc6c0) at qemu/hw/i386/pc.c:2004 #9 0x0000555555914da6 in device_set_realized (obj=<optimized out>, value=<optimized out>, errp=0x7fffffffc7e8) at hw/core/qdev.c:926 Cc: Haozhong Zhang <haozhong.zhang@intel.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Haozhong Zhang <haozhong.zhang@intel.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-06-09 18:16:15 +03:00
if (is_nvdimm) {
nvdimm_plug(ms->nvdimms_state);
}
hotplug_handler_plug(x86ms->acpi_dev, dev, &error_abort);
}
static void pc_memory_unplug_request(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
/*
* When "acpi=off" is used with the Q35 machine type, no ACPI is built,
* but pcms->acpi_dev is still created. Check !acpi_enabled in
* addition to cover this case.
*/
if (!x86ms->acpi_dev || !x86_machine_is_acpi_enabled(x86ms)) {
error_setg(errp,
"memory hotplug is not enabled: missing acpi device or acpi disabled");
return;
}
if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
error_setg(errp, "nvdimm device hot unplug is not supported yet.");
return;
}
hotplug_handler_unplug_request(x86ms->acpi_dev, dev,
errp);
}
static void pc_memory_unplug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
Error *local_err = NULL;
hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err);
if (local_err) {
goto out;
}
pc_dimm_unplug(PC_DIMM(dev), MACHINE(pcms));
qdev_unrealize(dev);
out:
error_propagate(errp, local_err);
}
static void pc_hv_balloon_pre_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
/* The vmbus handler has no hotplug handler; we should never end up here. */
g_assert(!dev->hotplugged);
memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), errp);
}
static void pc_hv_balloon_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
}
static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
pc_memory_pre_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
x86_cpu_pre_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
/* Declare the APIC range as the reserved MSI region */
char *resv_prop_str = g_strdup_printf("0xfee00000:0xfeefffff:%d",
VIRTIO_IOMMU_RESV_MEM_T_MSI);
QList *reserved_regions = qlist_new();
qlist_append_str(reserved_regions, resv_prop_str);
qdev_prop_set_array(dev, "reserved-regions", reserved_regions);
g_free(resv_prop_str);
}
if (object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
if (pcms->iommu) {
error_setg(errp, "QEMU does not support multiple vIOMMUs "
"for x86 yet.");
return;
}
pcms->iommu = dev;
} else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
pc_hv_balloon_pre_plug(hotplug_dev, dev, errp);
}
}
static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
pc_memory_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
x86_cpu_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
pc_hv_balloon_plug(hotplug_dev, dev, errp);
}
}
static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
pc_memory_unplug_request(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
x86_cpu_unplug_request_cb(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev),
errp);
} else {
error_setg(errp, "acpi: device unplug request for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
}
}
static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
pc_memory_unplug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
x86_cpu_unplug_cb(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
} else {
error_setg(errp, "acpi: device unplug for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
}
}
static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
DeviceState *dev)
{
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON) ||
object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) {
return HOTPLUG_HANDLER(machine);
}
return NULL;
}
static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
OnOffAuto vmport = pcms->vmport;
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
visit_type_OnOffAuto(v, name, &vmport, errp);
}
static void pc_machine_set_vmport(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
visit_type_OnOffAuto(v, name, &pcms->vmport, errp);
}
static bool pc_machine_get_fd_bootchk(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->fd_bootchk;
}
static void pc_machine_set_fd_bootchk(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->fd_bootchk = value;
}
static bool pc_machine_get_smbus(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->smbus_enabled;
}
static void pc_machine_set_smbus(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->smbus_enabled = value;
}
static bool pc_machine_get_sata(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->sata_enabled;
}
static void pc_machine_set_sata(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->sata_enabled = value;
}
static bool pc_machine_get_hpet(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->hpet_enabled;
}
static void pc_machine_set_hpet(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->hpet_enabled = value;
}
static bool pc_machine_get_i8042(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->i8042_enabled;
}
static void pc_machine_set_i8042(Object *obj, bool value, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->i8042_enabled = value;
}
static bool pc_machine_get_default_bus_bypass_iommu(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
return pcms->default_bus_bypass_iommu;
}
static void pc_machine_set_default_bus_bypass_iommu(Object *obj, bool value,
Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
pcms->default_bus_bypass_iommu = value;
}
static void pc_machine_get_smbios_ep(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
SmbiosEntryPointType smbios_entry_point_type = pcms->smbios_entry_point_type;
visit_type_SmbiosEntryPointType(v, name, &smbios_entry_point_type, errp);
}
static void pc_machine_set_smbios_ep(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
visit_type_SmbiosEntryPointType(v, name, &pcms->smbios_entry_point_type, errp);
}
static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
uint64_t value = pcms->max_ram_below_4g;
visit_type_size(v, name, &value, errp);
}
static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
uint64_t value;
error: Eliminate error_propagate() with Coccinelle, part 1 When all we do with an Error we receive into a local variable is propagating to somewhere else, we can just as well receive it there right away. Convert if (!foo(..., &err)) { ... error_propagate(errp, err); ... return ... } to if (!foo(..., errp)) { ... ... return ... } where nothing else needs @err. Coccinelle script: @rule1 forall@ identifier fun, err, errp, lbl; expression list args, args2; binary operator op; constant c1, c2; symbol false; @@ if ( ( - fun(args, &err, args2) + fun(args, errp, args2) | - !fun(args, &err, args2) + !fun(args, errp, args2) | - fun(args, &err, args2) op c1 + fun(args, errp, args2) op c1 ) ) { ... when != err when != lbl: when strict - error_propagate(errp, err); ... when != err ( return; | return c2; | return false; ) } @rule2 forall@ identifier fun, err, errp, lbl; expression list args, args2; expression var; binary operator op; constant c1, c2; symbol false; @@ - var = fun(args, &err, args2); + var = fun(args, errp, args2); ... when != err if ( ( var | !var | var op c1 ) ) { ... when != err when != lbl: when strict - error_propagate(errp, err); ... when != err ( return; | return c2; | return false; | return var; ) } @depends on rule1 || rule2@ identifier err; @@ - Error *err = NULL; ... when != err Not exactly elegant, I'm afraid. The "when != lbl:" is necessary to avoid transforming if (fun(args, &err)) { goto out } ... out: error_propagate(errp, err); even though other paths to label out still need the error_propagate(). For an actual example, see sclp_realize(). Without the "when strict", Coccinelle transforms vfio_msix_setup(), incorrectly. I don't know what exactly "when strict" does, only that it helps here. The match of return is narrower than what I want, but I can't figure out how to express "return where the operand doesn't use @err". For an example where it's too narrow, see vfio_intx_enable(). Silently fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Converted manually. Line breaks tidied up manually. One nested declaration of @local_err deleted manually. Preexisting unwanted blank line dropped in hw/riscv/sifive_e.c. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20200707160613.848843-35-armbru@redhat.com>
2020-07-07 19:06:02 +03:00
if (!visit_type_size(v, name, &value, errp)) {
return;
}
if (value > 4 * GiB) {
error_setg(errp,
"Machine option 'max-ram-below-4g=%"PRIu64
"' expects size less than or equal to 4G", value);
return;
}
if (value < 1 * MiB) {
warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary,"
"BIOS may not work with less than 1MiB", value);
}
pcms->max_ram_below_4g = value;
}
static void pc_machine_get_max_fw_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
uint64_t value = pcms->max_fw_size;
visit_type_size(v, name, &value, errp);
}
static void pc_machine_set_max_fw_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
uint64_t value;
if (!visit_type_size(v, name, &value, errp)) {
return;
}
/*
* We don't have a theoretically justifiable exact lower bound on the base
* address of any flash mapping. In practice, the IO-APIC MMIO range is
* [0xFEE00000..0xFEE01000] -- see IO_APIC_DEFAULT_ADDRESS --, leaving free
* only 18MiB-4KiB below 4GiB. For now, restrict the cumulative mapping to
* 16MiB in size.
*/
if (value > 16 * MiB) {
error_setg(errp,
"User specified max allowed firmware size %" PRIu64 " is "
"greater than 16MiB. If combined firmware size exceeds "
"16MiB the system may not boot, or experience intermittent"
"stability issues.",
value);
return;
}
pcms->max_fw_size = value;
}
static void pc_machine_initfn(Object *obj)
{
PCMachineState *pcms = PC_MACHINE(obj);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
#ifdef CONFIG_VMPORT
pcms->vmport = ON_OFF_AUTO_AUTO;
#else
pcms->vmport = ON_OFF_AUTO_OFF;
#endif /* CONFIG_VMPORT */
pcms->max_ram_below_4g = 0; /* use default */
pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type;
pcms->south_bridge = pcmc->default_south_bridge;
/* acpi build is enabled by default if machine supports it */
pcms->acpi_build_enabled = pcmc->has_acpi_build;
pcms->smbus_enabled = true;
pcms->sata_enabled = true;
pcms->i8042_enabled = true;
pcms->max_fw_size = 8 * MiB;
#ifdef CONFIG_HPET
pcms->hpet_enabled = true;
#endif
pcms->fd_bootchk = true;
pcms->default_bus_bypass_iommu = false;
pc: Support firmware configuration with -blockdev The PC machines put firmware in ROM by default. To get it put into flash memory (required by OVMF), you have to use -drive if=pflash,unit=0,... and optionally -drive if=pflash,unit=1,... Why two -drive? This permits setting up one part of the flash memory read-only, and the other part read/write. It also makes upgrading firmware on the host easier. Below the hood, it creates two separate flash devices, because we were too lazy to improve our flash device models to support sector protection. The problem at hand is to do the same with -blockdev somehow, as one more step towards deprecating -drive. Mapping -drive if=none,... to -blockdev is a solved problem. With if=T other than if=none, -drive additionally configures a block device frontend. For non-onboard devices, that part maps to -device. Also a solved problem. For onboard devices such as PC flash memory, we have an unsolved problem. This is actually an instance of a wider problem: our general device configuration interface doesn't cover onboard devices. Instead, we have a zoo of ad hoc interfaces that are much more limited. One of them is -drive, which we'd rather deprecate, but can't until we have suitable replacements for all its uses. Sadly, I can't attack the wider problem today. So back to the narrow problem. My first idea was to reduce it to its solved buddy by using pluggable instead of onboard devices for the flash memory. Workable, but it requires some extra smarts in firmware descriptors and libvirt. Paolo had an idea that is simpler for libvirt: keep the devices onboard, and add machine properties for their block backends. The implementation is less than straightforward, I'm afraid. First, block backend properties are *qdev* properties. Machines can't have those, as they're not devices. I could duplicate these qdev properties as QOM properties, but I hate that. More seriously, the properties do not belong to the machine, they belong to the onboard flash devices. Adding them to the machine would then require bad magic to somehow transfer them to the flash devices. Fortunately, QOM provides the means to handle exactly this case: add alias properties to the machine that forward to the onboard devices' properties. Properties need to be created in .instance_init() methods. For PC machines, that's pc_machine_initfn(). To make alias properties work, we need to create the onboard flash devices there, too. Requires several bug fixes, in the previous commits. We also have to realize the devices. More on that below. If the user sets pflash0, firmware resides in flash memory. pc_system_firmware_init() maps and realizes the flash devices. Else, firmware resides in ROM. The onboard flash devices aren't used then. pc_system_firmware_init() destroys them unrealized, along with the alias properties. The existing code to pick up drives defined with -drive if=pflash is replaced by code to desugar into the machine properties. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Acked-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <87ftrtux81.fsf@dusky.pond.sub.org>
2019-03-11 20:39:26 +03:00
pc_system_flash_create(pcms);
pcms->pcspk = isa_new(TYPE_PC_SPEAKER);
object_property_add_alias(OBJECT(pcms), "pcspk-audiodev",
OBJECT(pcms->pcspk), "audiodev");
if (pcmc->pci_enabled) {
cxl_machine_init(obj, &pcms->cxl_devices_state);
}
pcms->machine_done.notify = pc_machine_done;
qemu_add_machine_init_done_notifier(&pcms->machine_done);
}
static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
{
CPUState *cs;
X86CPU *cpu;
qemu_devices_reset(reason);
/* Reset APIC after devices have been reset to cancel
* any changes that qemu_devices_reset() might have done.
*/
CPU_FOREACH(cs) {
cpu = X86_CPU(cs);
x86_cpu_after_reset(cpu);
}
}
static void pc_machine_wakeup(MachineState *machine)
{
cpu_synchronize_all_states();
pc_machine_reset(machine, SHUTDOWN_CAUSE_NONE);
cpu_synchronize_all_post_reset();
}
static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
{
X86IOMMUState *iommu = x86_iommu_get_default();
IntelIOMMUState *intel_iommu;
if (iommu &&
object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
object_dynamic_cast((Object *)dev, "vfio-pci")) {
intel_iommu = INTEL_IOMMU_DEVICE(iommu);
if (!intel_iommu->caching_mode) {
error_setg(errp, "Device assignment is not allowed without "
"enabling caching-mode=on for Intel IOMMU.");
return false;
}
}
return true;
}
static void pc_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
pcmc->pci_enabled = true;
pcmc->has_acpi_build = true;
pcmc->smbios_defaults = true;
pcmc->gigabyte_align = true;
pcmc->has_reserved_memory = true;
pcmc->enforce_amd_1tb_hole = true;
hw/i386/pc_sysfw: Alias rather than copy isa-bios region In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped to the top of the 4G memory boundary. Do the same in the -pflash case, but only for new machine versions for migration compatibility. This establishes common behavior and makes pflash commands work in the "isa-bios" region which some real-world legacy bioses rely on. Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash case will now also point to encrypted memory, just like it already does in the -bios case. When running `info mtree` before and after this commit with `qemu-system-x86_64 -S -drive \ if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running `diff -u before.mtree after.mtree` results in the following changes in the memory tree: --- before.mtree +++ after.mtree @@ -71,7 +71,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -108,7 +108,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -131,11 +131,14 @@ memory-region: pc.ram 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +memory-region: system.flash0 + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + memory-region: pci 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff memory-region: smram 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff Note that in both cases the "system" memory region contains the entry 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 but the "system.flash0" memory region only appears standalone when "isa-bios" is an alias. Signed-off-by: Bernhard Beschow <shentey@gmail.com> Message-ID: <20240508175507.22270-7-shentey@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2024-05-08 20:55:07 +03:00
pcmc->isa_bios_alias = true;
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
* to be used at the moment, 32K should be enough for a while. */
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
x86mc->apic_xrupt_override = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
NUMA: Enable adding NUMA node implicitly Linux and Windows need ACPI SRAT table to make memory hotplug work properly, however currently QEMU doesn't create SRAT table if numa options aren't present on CLI. Which breaks both linux and windows guests in certain conditions: * Windows: won't enable memory hotplug without SRAT table at all * Linux: if QEMU is started with initial memory all below 4Gb and no SRAT table present, guest kernel will use nommu DMA ops, which breaks 32bit hw drivers when memory is hotplugged and guest tries to use it with that drivers. Fix above issues by automatically creating a numa node when QEMU is started with memory hotplug enabled but without '-numa' options on CLI. (PS: auto-create numa node only for new machine types so not to break migration). Which would provide SRAT table to guests without explicit -numa options on CLI and would allow: * Windows: to enable memory hotplug * Linux: switch to SWIOTLB DMA ops, to bounce DMA transfers to 32bit allocated buffers that legacy drivers/hw can handle. [Rewritten by Igor] Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Richard Henderson <rth@twiddle.net> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Thomas Huth <thuth@redhat.com> Cc: Alistair Francis <alistair23@gmail.com> Cc: Takao Indoh <indou.takao@jp.fujitsu.com> Cc: Izumi Taku <izumi.taku@jp.fujitsu.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-11-14 05:34:01 +03:00
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
mc->block_default_type = IF_IDE;
mc->max_cpus = 255;
mc->reset = pc_machine_reset;
mc->wakeup = pc_machine_wakeup;
hc->pre_plug = pc_machine_device_pre_plug_cb;
hc->plug = pc_machine_device_plug_cb;
hc->unplug_request = pc_machine_device_unplug_request_cb;
hc->unplug = pc_machine_device_unplug_cb;
mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
mc->nvdimm_supported = true;
mc->smp_props.dies_supported = true;
mc->smp_props.modules_supported = true;
mc->default_ram_id = "pc.ram";
pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO;
object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g,
NULL, NULL);
object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G,
"Maximum ram below the 4G boundary (32bit boundary)");
object_class_property_add(oc, PC_MACHINE_VMPORT, "OnOffAuto",
pc_machine_get_vmport, pc_machine_set_vmport,
qom: Drop parameter @errp of object_property_add() & friends The only way object_property_add() can fail is when a property with the same name already exists. Since our property names are all hardcoded, failure is a programming error, and the appropriate way to handle it is passing &error_abort. Same for its variants, except for object_property_add_child(), which additionally fails when the child already has a parent. Parentage is also under program control, so this is a programming error, too. We have a bit over 500 callers. Almost half of them pass &error_abort, slightly fewer ignore errors, one test case handles errors, and the remaining few callers pass them to their own callers. The previous few commits demonstrated once again that ignoring programming errors is a bad idea. Of the few ones that pass on errors, several violate the Error API. The Error ** argument must be NULL, &error_abort, &error_fatal, or a pointer to a variable containing NULL. Passing an argument of the latter kind twice without clearing it in between is wrong: if the first call sets an error, it no longer points to NULL for the second call. ich9_pm_add_properties(), sparc32_ledma_realize(), sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize() are wrong that way. When the one appropriate choice of argument is &error_abort, letting users pick the argument is a bad idea. Drop parameter @errp and assert the preconditions instead. There's one exception to "duplicate property name is a programming error": the way object_property_add() implements the magic (and undocumented) "automatic arrayification". Don't drop @errp there. Instead, rename object_property_add() to object_property_try_add(), and add the obvious wrapper object_property_add(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-15-armbru@redhat.com> [Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
NULL, NULL);
object_class_property_set_description(oc, PC_MACHINE_VMPORT,
"Enable vmport (pc & q35)");
object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
qom: Drop parameter @errp of object_property_add() & friends The only way object_property_add() can fail is when a property with the same name already exists. Since our property names are all hardcoded, failure is a programming error, and the appropriate way to handle it is passing &error_abort. Same for its variants, except for object_property_add_child(), which additionally fails when the child already has a parent. Parentage is also under program control, so this is a programming error, too. We have a bit over 500 callers. Almost half of them pass &error_abort, slightly fewer ignore errors, one test case handles errors, and the remaining few callers pass them to their own callers. The previous few commits demonstrated once again that ignoring programming errors is a bad idea. Of the few ones that pass on errors, several violate the Error API. The Error ** argument must be NULL, &error_abort, &error_fatal, or a pointer to a variable containing NULL. Passing an argument of the latter kind twice without clearing it in between is wrong: if the first call sets an error, it no longer points to NULL for the second call. ich9_pm_add_properties(), sparc32_ledma_realize(), sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize() are wrong that way. When the one appropriate choice of argument is &error_abort, letting users pick the argument is a bad idea. Drop parameter @errp and assert the preconditions instead. There's one exception to "duplicate property name is a programming error": the way object_property_add() implements the magic (and undocumented) "automatic arrayification". Don't drop @errp there. Instead, rename object_property_add() to object_property_try_add(), and add the obvious wrapper object_property_add(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-15-armbru@redhat.com> [Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
pc_machine_get_smbus, pc_machine_set_smbus);
object_class_property_set_description(oc, PC_MACHINE_SMBUS,
"Enable/disable system management bus");
object_class_property_add_bool(oc, PC_MACHINE_SATA,
qom: Drop parameter @errp of object_property_add() & friends The only way object_property_add() can fail is when a property with the same name already exists. Since our property names are all hardcoded, failure is a programming error, and the appropriate way to handle it is passing &error_abort. Same for its variants, except for object_property_add_child(), which additionally fails when the child already has a parent. Parentage is also under program control, so this is a programming error, too. We have a bit over 500 callers. Almost half of them pass &error_abort, slightly fewer ignore errors, one test case handles errors, and the remaining few callers pass them to their own callers. The previous few commits demonstrated once again that ignoring programming errors is a bad idea. Of the few ones that pass on errors, several violate the Error API. The Error ** argument must be NULL, &error_abort, &error_fatal, or a pointer to a variable containing NULL. Passing an argument of the latter kind twice without clearing it in between is wrong: if the first call sets an error, it no longer points to NULL for the second call. ich9_pm_add_properties(), sparc32_ledma_realize(), sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize() are wrong that way. When the one appropriate choice of argument is &error_abort, letting users pick the argument is a bad idea. Drop parameter @errp and assert the preconditions instead. There's one exception to "duplicate property name is a programming error": the way object_property_add() implements the magic (and undocumented) "automatic arrayification". Don't drop @errp there. Instead, rename object_property_add() to object_property_try_add(), and add the obvious wrapper object_property_add(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-15-armbru@redhat.com> [Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
pc_machine_get_sata, pc_machine_set_sata);
object_class_property_set_description(oc, PC_MACHINE_SATA,
"Enable/disable Serial ATA bus");
object_class_property_add_bool(oc, "hpet",
pc_machine_get_hpet, pc_machine_set_hpet);
object_class_property_set_description(oc, "hpet",
"Enable/disable high precision event timer emulation");
object_class_property_add_bool(oc, PC_MACHINE_I8042,
pc_machine_get_i8042, pc_machine_set_i8042);
object_class_property_add_bool(oc, "default-bus-bypass-iommu",
pc_machine_get_default_bus_bypass_iommu,
pc_machine_set_default_bus_bypass_iommu);
object_class_property_add(oc, PC_MACHINE_MAX_FW_SIZE, "size",
pc_machine_get_max_fw_size, pc_machine_set_max_fw_size,
NULL, NULL);
object_class_property_set_description(oc, PC_MACHINE_MAX_FW_SIZE,
"Maximum combined firmware size");
object_class_property_add(oc, PC_MACHINE_SMBIOS_EP, "str",
pc_machine_get_smbios_ep, pc_machine_set_smbios_ep,
NULL, NULL);
object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP,
"SMBIOS Entry Point type [32, 64]");
object_class_property_add_bool(oc, "fd-bootchk",
pc_machine_get_fd_bootchk,
pc_machine_set_fd_bootchk);
}
static const TypeInfo pc_machine_info = {
.name = TYPE_PC_MACHINE,
.parent = TYPE_X86_MACHINE,
.abstract = true,
.instance_size = sizeof(PCMachineState),
.instance_init = pc_machine_initfn,
.class_size = sizeof(PCMachineClass),
.class_init = pc_machine_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ }
},
};
static void pc_machine_register_types(void)
{
type_register_static(&pc_machine_info);
}
type_init(pc_machine_register_types)