2007-11-17 20:14:51 +03:00
|
|
|
#ifndef HW_PC_H
|
|
|
|
#define HW_PC_H
|
2009-03-06 02:01:23 +03:00
|
|
|
|
2020-02-28 14:46:49 +03:00
|
|
|
#include "qemu/notify.h"
|
|
|
|
#include "qapi/qapi-types-common.h"
|
2021-02-04 22:39:38 +03:00
|
|
|
#include "qemu/uuid.h"
|
2014-06-10 15:15:17 +04:00
|
|
|
#include "hw/boards.h"
|
2013-02-05 20:06:20 +04:00
|
|
|
#include "hw/block/fdc.h"
|
pc: Support firmware configuration with -blockdev
The PC machines put firmware in ROM by default. To get it put into
flash memory (required by OVMF), you have to use -drive
if=pflash,unit=0,... and optionally -drive if=pflash,unit=1,...
Why two -drive? This permits setting up one part of the flash memory
read-only, and the other part read/write. It also makes upgrading
firmware on the host easier. Below the hood, it creates two separate
flash devices, because we were too lazy to improve our flash device
models to support sector protection.
The problem at hand is to do the same with -blockdev somehow, as one
more step towards deprecating -drive.
Mapping -drive if=none,... to -blockdev is a solved problem. With
if=T other than if=none, -drive additionally configures a block device
frontend. For non-onboard devices, that part maps to -device. Also a
solved problem. For onboard devices such as PC flash memory, we have
an unsolved problem.
This is actually an instance of a wider problem: our general device
configuration interface doesn't cover onboard devices. Instead, we have
a zoo of ad hoc interfaces that are much more limited. One of them is
-drive, which we'd rather deprecate, but can't until we have suitable
replacements for all its uses.
Sadly, I can't attack the wider problem today. So back to the narrow
problem.
My first idea was to reduce it to its solved buddy by using pluggable
instead of onboard devices for the flash memory. Workable, but it
requires some extra smarts in firmware descriptors and libvirt. Paolo
had an idea that is simpler for libvirt: keep the devices onboard, and
add machine properties for their block backends.
The implementation is less than straightforward, I'm afraid.
First, block backend properties are *qdev* properties. Machines can't
have those, as they're not devices. I could duplicate these qdev
properties as QOM properties, but I hate that.
More seriously, the properties do not belong to the machine, they
belong to the onboard flash devices. Adding them to the machine would
then require bad magic to somehow transfer them to the flash devices.
Fortunately, QOM provides the means to handle exactly this case: add
alias properties to the machine that forward to the onboard devices'
properties.
Properties need to be created in .instance_init() methods. For PC
machines, that's pc_machine_initfn(). To make alias properties work,
we need to create the onboard flash devices there, too. Requires
several bug fixes, in the previous commits. We also have to realize
the devices. More on that below.
If the user sets pflash0, firmware resides in flash memory.
pc_system_firmware_init() maps and realizes the flash devices.
Else, firmware resides in ROM. The onboard flash devices aren't used
then. pc_system_firmware_init() destroys them unrealized, along with
the alias properties.
The existing code to pick up drives defined with -drive if=pflash is
replaced by code to desugar into the machine properties.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <87ftrtux81.fsf@dusky.pond.sub.org>
2019-03-11 20:39:26 +03:00
|
|
|
#include "hw/block/flash.h"
|
2019-10-22 10:39:50 +03:00
|
|
|
#include "hw/i386/x86.h"
|
2009-03-06 02:01:23 +03:00
|
|
|
|
2020-02-28 14:46:49 +03:00
|
|
|
#include "hw/hotplug.h"
|
2020-09-03 23:43:22 +03:00
|
|
|
#include "qom/object.h"
|
vl: Add sgx compound properties to expose SGX EPC sections to guest
Because SGX EPC is enumerated through CPUID, EPC "devices" need to be
realized prior to realizing the vCPUs themselves, i.e. long before
generic devices are parsed and realized. From a virtualization
perspective, the CPUID aspect also means that EPC sections cannot be
hotplugged without paravirtualizing the guest kernel (hardware does
not support hotplugging as EPC sections must be locked down during
pre-boot to provide EPC's security properties).
So even though EPC sections could be realized through the generic
-devices command, they need to be created much earlier for them to
actually be usable by the guest. Place all EPC sections in a
contiguous block, somewhat arbitrarily starting after RAM above 4g.
Ensuring EPC is in a contiguous region simplifies calculations, e.g.
device memory base, PCI hole, etc..., allows dynamic calculation of the
total EPC size, e.g. exposing EPC to guests does not require -maxmem,
and last but not least allows all of EPC to be enumerated in a single
ACPI entry, which is expected by some kernels, e.g. Windows 7 and 8.
The new compound properties command for sgx like below:
......
-object memory-backend-epc,id=mem1,size=28M,prealloc=on \
-object memory-backend-epc,id=mem2,size=10M \
-M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <20210719112136.57018-6-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-09-28 11:40:58 +03:00
|
|
|
#include "hw/i386/sgx-epc.h"
|
2022-06-08 17:54:39 +03:00
|
|
|
#include "hw/cxl/cxl.h"
|
2013-05-30 13:57:26 +04:00
|
|
|
|
2024-02-20 19:06:13 +03:00
|
|
|
#define MAX_IDE_BUS 2
|
|
|
|
|
2014-06-02 17:25:08 +04:00
|
|
|
/**
|
|
|
|
* PCMachineState:
|
2014-06-02 17:25:24 +04:00
|
|
|
* @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
|
2016-11-16 16:04:41 +03:00
|
|
|
* @boot_cpus: number of present VCPUs
|
2014-06-02 17:25:08 +04:00
|
|
|
*/
|
2020-09-08 18:55:30 +03:00
|
|
|
typedef struct PCMachineState {
|
2014-06-02 17:24:57 +04:00
|
|
|
/*< private >*/
|
2019-10-22 10:39:50 +03:00
|
|
|
X86MachineState parent_obj;
|
2014-06-02 17:25:08 +04:00
|
|
|
|
|
|
|
/* <public> */
|
2015-12-11 21:42:21 +03:00
|
|
|
|
|
|
|
/* State for other subsystems/APIs: */
|
2015-12-11 21:42:23 +03:00
|
|
|
Notifier machine_done;
|
2014-06-02 17:25:24 +04:00
|
|
|
|
2015-12-11 21:42:21 +03:00
|
|
|
/* Pointers to devices and objects: */
|
2024-02-24 16:58:47 +03:00
|
|
|
PCIBus *pcibus;
|
2016-05-13 04:43:45 +03:00
|
|
|
I2CBus *smbus;
|
pc: Support firmware configuration with -blockdev
The PC machines put firmware in ROM by default. To get it put into
flash memory (required by OVMF), you have to use -drive
if=pflash,unit=0,... and optionally -drive if=pflash,unit=1,...
Why two -drive? This permits setting up one part of the flash memory
read-only, and the other part read/write. It also makes upgrading
firmware on the host easier. Below the hood, it creates two separate
flash devices, because we were too lazy to improve our flash device
models to support sector protection.
The problem at hand is to do the same with -blockdev somehow, as one
more step towards deprecating -drive.
Mapping -drive if=none,... to -blockdev is a solved problem. With
if=T other than if=none, -drive additionally configures a block device
frontend. For non-onboard devices, that part maps to -device. Also a
solved problem. For onboard devices such as PC flash memory, we have
an unsolved problem.
This is actually an instance of a wider problem: our general device
configuration interface doesn't cover onboard devices. Instead, we have
a zoo of ad hoc interfaces that are much more limited. One of them is
-drive, which we'd rather deprecate, but can't until we have suitable
replacements for all its uses.
Sadly, I can't attack the wider problem today. So back to the narrow
problem.
My first idea was to reduce it to its solved buddy by using pluggable
instead of onboard devices for the flash memory. Workable, but it
requires some extra smarts in firmware descriptors and libvirt. Paolo
had an idea that is simpler for libvirt: keep the devices onboard, and
add machine properties for their block backends.
The implementation is less than straightforward, I'm afraid.
First, block backend properties are *qdev* properties. Machines can't
have those, as they're not devices. I could duplicate these qdev
properties as QOM properties, but I hate that.
More seriously, the properties do not belong to the machine, they
belong to the onboard flash devices. Adding them to the machine would
then require bad magic to somehow transfer them to the flash devices.
Fortunately, QOM provides the means to handle exactly this case: add
alias properties to the machine that forward to the onboard devices'
properties.
Properties need to be created in .instance_init() methods. For PC
machines, that's pc_machine_initfn(). To make alias properties work,
we need to create the onboard flash devices there, too. Requires
several bug fixes, in the previous commits. We also have to realize
the devices. More on that below.
If the user sets pflash0, firmware resides in flash memory.
pc_system_firmware_init() maps and realizes the flash devices.
Else, firmware resides in ROM. The onboard flash devices aren't used
then. pc_system_firmware_init() destroys them unrealized, along with
the alias properties.
The existing code to pick up drives defined with -drive if=pflash is
replaced by code to desugar into the machine properties.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <87ftrtux81.fsf@dusky.pond.sub.org>
2019-03-11 20:39:26 +03:00
|
|
|
PFlashCFI01 *flash[2];
|
2020-07-02 16:25:21 +03:00
|
|
|
ISADevice *pcspk;
|
2021-10-26 21:20:22 +03:00
|
|
|
DeviceState *iommu;
|
2024-02-20 19:06:13 +03:00
|
|
|
BusState *idebus[MAX_IDE_BUS];
|
2014-06-20 05:40:25 +04:00
|
|
|
|
2015-12-11 21:42:21 +03:00
|
|
|
/* Configuration options: */
|
2020-05-29 10:39:56 +03:00
|
|
|
uint64_t max_ram_below_4g;
|
2014-11-21 19:18:52 +03:00
|
|
|
OnOffAuto vmport;
|
2021-10-26 18:11:00 +03:00
|
|
|
SmbiosEntryPointType smbios_entry_point_type;
|
2023-10-07 15:38:37 +03:00
|
|
|
const char *south_bridge;
|
2016-03-04 19:00:32 +03:00
|
|
|
|
2016-11-01 20:44:16 +03:00
|
|
|
bool acpi_build_enabled;
|
2018-11-07 18:24:34 +03:00
|
|
|
bool smbus_enabled;
|
|
|
|
bool sata_enabled;
|
2020-10-21 17:47:16 +03:00
|
|
|
bool hpet_enabled;
|
2022-02-28 00:06:55 +03:00
|
|
|
bool i8042_enabled;
|
2021-07-08 15:55:14 +03:00
|
|
|
bool default_bus_bypass_iommu;
|
2024-02-13 12:56:56 +03:00
|
|
|
bool fd_bootchk;
|
2020-12-08 18:53:38 +03:00
|
|
|
uint64_t max_fw_size;
|
2016-11-01 20:44:16 +03:00
|
|
|
|
2019-09-18 16:06:23 +03:00
|
|
|
/* ACPI Memory hotplug IO base address */
|
|
|
|
hwaddr memhp_io_base;
|
vl: Add sgx compound properties to expose SGX EPC sections to guest
Because SGX EPC is enumerated through CPUID, EPC "devices" need to be
realized prior to realizing the vCPUs themselves, i.e. long before
generic devices are parsed and realized. From a virtualization
perspective, the CPUID aspect also means that EPC sections cannot be
hotplugged without paravirtualizing the guest kernel (hardware does
not support hotplugging as EPC sections must be locked down during
pre-boot to provide EPC's security properties).
So even though EPC sections could be realized through the generic
-devices command, they need to be created much earlier for them to
actually be usable by the guest. Place all EPC sections in a
contiguous block, somewhat arbitrarily starting after RAM above 4g.
Ensuring EPC is in a contiguous region simplifies calculations, e.g.
device memory base, PCI hole, etc..., allows dynamic calculation of the
total EPC size, e.g. exposing EPC to guests does not require -maxmem,
and last but not least allows all of EPC to be enumerated in a single
ACPI entry, which is expected by some kernels, e.g. Windows 7 and 8.
The new compound properties command for sgx like below:
......
-object memory-backend-epc,id=mem1,size=28M,prealloc=on \
-object memory-backend-epc,id=mem2,size=10M \
-M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <20210719112136.57018-6-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-09-28 11:40:58 +03:00
|
|
|
|
|
|
|
SGXEPCState sgx_epc;
|
2022-06-08 17:54:39 +03:00
|
|
|
CXLState cxl_devices_state;
|
2020-09-08 18:55:30 +03:00
|
|
|
} PCMachineState;
|
2014-06-02 17:24:57 +04:00
|
|
|
|
2014-06-02 17:25:24 +04:00
|
|
|
#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
|
2020-05-29 10:39:56 +03:00
|
|
|
#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
|
2014-10-04 01:33:37 +04:00
|
|
|
#define PC_MACHINE_VMPORT "vmport"
|
2016-11-05 10:19:48 +03:00
|
|
|
#define PC_MACHINE_SMBUS "smbus"
|
2016-11-05 10:19:49 +03:00
|
|
|
#define PC_MACHINE_SATA "sata"
|
2022-02-28 00:06:55 +03:00
|
|
|
#define PC_MACHINE_I8042 "i8042"
|
2020-12-08 18:53:38 +03:00
|
|
|
#define PC_MACHINE_MAX_FW_SIZE "max-fw-size"
|
2021-10-26 18:11:00 +03:00
|
|
|
#define PC_MACHINE_SMBIOS_EP "smbios-entry-point-type"
|
|
|
|
|
2014-06-02 17:25:12 +04:00
|
|
|
/**
|
|
|
|
* PCMachineClass:
|
2015-12-11 21:42:21 +03:00
|
|
|
*
|
|
|
|
* Compat fields:
|
|
|
|
*
|
2015-12-02 01:58:06 +03:00
|
|
|
* @enforce_aligned_dimm: check that DIMM's address/size is aligned by
|
|
|
|
* backend's alignment value if provided
|
2015-12-11 21:42:21 +03:00
|
|
|
* @acpi_data_size: Size of the chunk of memory at the top of RAM
|
|
|
|
* for the BIOS ACPI tables and other BIOS
|
|
|
|
* datastructures.
|
|
|
|
* @gigabyte_align: Make sure that guest addresses aligned at
|
|
|
|
* 1Gbyte boundaries get mapped to host
|
|
|
|
* addresses aligned at 1Gbyte boundaries. This
|
|
|
|
* way we can use 1GByte pages in the host.
|
|
|
|
*
|
2014-06-02 17:25:12 +04:00
|
|
|
*/
|
2020-09-03 23:43:22 +03:00
|
|
|
struct PCMachineClass {
|
2014-06-02 17:24:57 +04:00
|
|
|
/*< private >*/
|
2019-10-22 10:39:50 +03:00
|
|
|
X86MachineClass parent_class;
|
2014-06-02 17:25:12 +04:00
|
|
|
|
|
|
|
/*< public >*/
|
2015-12-11 21:42:21 +03:00
|
|
|
|
|
|
|
/* Device configuration: */
|
2015-12-02 01:58:03 +03:00
|
|
|
bool pci_enabled;
|
2023-10-07 15:38:37 +03:00
|
|
|
const char *default_south_bridge;
|
2015-12-11 21:42:21 +03:00
|
|
|
|
|
|
|
/* Compat options: */
|
|
|
|
|
2019-06-28 03:28:42 +03:00
|
|
|
/* Default CPU model version. See x86_cpu_set_default_version(). */
|
|
|
|
int default_cpu_version;
|
|
|
|
|
2015-12-11 21:42:21 +03:00
|
|
|
/* ACPI compat: */
|
2015-12-02 01:58:03 +03:00
|
|
|
bool has_acpi_build;
|
|
|
|
bool rsdp_in_ram;
|
2015-12-11 21:42:21 +03:00
|
|
|
int legacy_acpi_table_size;
|
|
|
|
unsigned acpi_data_size;
|
2021-03-01 22:59:18 +03:00
|
|
|
int pci_root_uid;
|
2015-12-11 21:42:21 +03:00
|
|
|
|
|
|
|
/* SMBIOS compat: */
|
2015-12-02 01:58:03 +03:00
|
|
|
bool smbios_defaults;
|
|
|
|
bool smbios_legacy_mode;
|
|
|
|
bool smbios_uuid_encoded;
|
2023-06-07 23:57:16 +03:00
|
|
|
SmbiosEntryPointType default_smbios_ep_type;
|
2015-12-11 21:42:21 +03:00
|
|
|
|
|
|
|
/* RAM / address space compat: */
|
2015-12-02 01:58:03 +03:00
|
|
|
bool gigabyte_align;
|
|
|
|
bool has_reserved_memory;
|
2015-12-02 01:58:06 +03:00
|
|
|
bool enforce_aligned_dimm;
|
2015-12-11 21:42:21 +03:00
|
|
|
bool broken_reserved_end;
|
2022-07-19 20:00:14 +03:00
|
|
|
bool enforce_amd_1tb_hole;
|
hw/i386/pc_sysfw: Alias rather than copy isa-bios region
In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped
to the top of the 4G memory boundary. Do the same in the -pflash case, but only
for new machine versions for migration compatibility. This establishes common
behavior and makes pflash commands work in the "isa-bios" region which some
real-world legacy bioses rely on.
Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash
case will now also point to encrypted memory, just like it already does in the
-bios case.
When running `info mtree` before and after this commit with
`qemu-system-x86_64 -S -drive \
if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running
`diff -u before.mtree after.mtree` results in the following changes in the
memory tree:
--- before.mtree
+++ after.mtree
@@ -71,7 +71,7 @@
0000000000000000-ffffffffffffffff (prio -1, i/o): pci
00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
- 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
+ 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
@@ -108,7 +108,7 @@
0000000000000000-ffffffffffffffff (prio -1, i/o): pci
00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
- 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
+ 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
@@ -131,11 +131,14 @@
memory-region: pc.ram
0000000000000000-0000000007ffffff (prio 0, ram): pc.ram
+memory-region: system.flash0
+ 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
+
memory-region: pci
0000000000000000-ffffffffffffffff (prio -1, i/o): pci
00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
- 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
+ 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
memory-region: smram
00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff
Note that in both cases the "system" memory region contains the entry
00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
but the "system.flash0" memory region only appears standalone when "isa-bios" is
an alias.
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-ID: <20240508175507.22270-7-shentey@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2024-05-08 20:55:07 +03:00
|
|
|
bool isa_bios_alias;
|
2015-11-24 06:33:57 +03:00
|
|
|
|
2016-06-15 12:25:23 +03:00
|
|
|
/* generate legacy CPU hotplug AML */
|
|
|
|
bool legacy_cpu_hotplug;
|
2017-04-25 18:37:50 +03:00
|
|
|
|
2019-01-22 15:10:48 +03:00
|
|
|
/* use PVH to load kernels that support this feature */
|
|
|
|
bool pvh_enabled;
|
2020-09-22 18:19:34 +03:00
|
|
|
|
|
|
|
/* create kvmclock device even when KVM PV features are not exposed */
|
|
|
|
bool kvmclock_create_always;
|
2023-03-29 07:57:26 +03:00
|
|
|
|
|
|
|
/* resizable acpi blob compat */
|
|
|
|
bool resizable_acpi_blob;
|
hw/i386/pc: improve physical address space bound check for 32-bit x86 systems
32-bit x86 systems do not have a reserved memory for hole64. On those 32-bit
systems without PSE36 or PAE CPU features, hotplugging memory devices are not
supported by QEMU as QEMU always places hotplugged memory above 4 GiB boundary
which is beyond the physical address space of the processor. Linux guests also
does not support memory hotplug on those systems. Please see Linux
kernel commit b59d02ed08690 ("mm/memory_hotplug: disable the functionality
for 32b") for more details.
Therefore, the maximum limit of the guest physical address in the absence of
additional memory devices effectively coincides with the end of
"above 4G memory space" region for 32-bit x86 without PAE/PSE36. When users
configure additional memory devices, after properly accounting for the
additional device memory region to find the maximum value of the guest
physical address, the address will be outside the range of the processor's
physical address space.
This change adds improvements to take above into consideration.
For example, previously this was allowed:
$ ./qemu-system-x86_64 -cpu pentium -m size=10G
With this change now it is no longer allowed:
$ ./qemu-system-x86_64 -cpu pentium -m size=10G
qemu-system-x86_64: Address space limit 0xffffffff < 0x2bfffffff phys-bits too low (32)
However, the following are allowed since on both cases physical address
space of the processor is 36 bits:
$ ./qemu-system-x86_64 -cpu pentium2 -m size=10G
$ ./qemu-system-x86_64 -cpu pentium,pse36=on -m size=10G
For 32-bit, without PAE/PSE36, hotplugging additional memory is no longer allowed.
$ ./qemu-system-i386 -m size=1G,maxmem=3G,slots=2
qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32)
$ ./qemu-system-i386 -machine q35 -m size=1G,maxmem=3G,slots=2
qemu-system-i386: Address space limit 0xffffffff < 0x1ffffffff phys-bits too low (32)
A new compatibility flag is introduced to make sure pc_max_used_gpa() keeps
returning the old value for machines 8.1 and older.
Therefore, the above is still allowed for older machine types in order to support
compatibility. Hence, the following still works:
$ ./qemu-system-i386 -machine pc-i440fx-8.1 -m size=1G,maxmem=3G,slots=2
$ ./qemu-system-i386 -machine pc-q35-8.1 -m size=1G,maxmem=3G,slots=2
Further, following is also allowed as with PSE36, the processor has 36-bit
address space:
$ ./qemu-system-i386 -cpu 486,pse36=on -m size=1G,maxmem=3G,slots=2
After calling CPUID with EAX=0x80000001, all AMD64 compliant processors
have the longmode-capable-bit turned on in the extended feature flags (bit 29)
in EDX. The absence of CPUID longmode can be used to differentiate between
32-bit and 64-bit processors and is the recommended approach. QEMU takes this
approach elsewhere (for example, please see x86_cpu_realizefn()), With
this change, pc_max_used_gpa() also uses the same method to detect 32-bit
processors.
Unit tests are modified to not run 32-bit x86 tests that use memory hotplug.
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Message-Id: <20230922160413.165702-1-anisinha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-09-22 19:04:13 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* whether the machine type implements broken 32-bit address space bound
|
|
|
|
* check for memory.
|
|
|
|
*/
|
|
|
|
bool broken_32bit_mem_addr_check;
|
2020-09-03 23:43:22 +03:00
|
|
|
};
|
2014-06-02 17:24:57 +04:00
|
|
|
|
|
|
|
#define TYPE_PC_MACHINE "generic-pc-machine"
|
2020-09-16 21:25:18 +03:00
|
|
|
OBJECT_DECLARE_TYPE(PCMachineState, PCMachineClass, PC_MACHINE)
|
2014-06-02 17:24:57 +04:00
|
|
|
|
2015-09-22 16:18:20 +03:00
|
|
|
/* ioapic.c */
|
|
|
|
|
2019-10-18 16:59:06 +03:00
|
|
|
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled);
|
|
|
|
|
2007-11-17 20:14:51 +03:00
|
|
|
/* pc.c */
|
|
|
|
|
2010-05-14 11:29:15 +04:00
|
|
|
void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
|
|
|
|
|
2023-06-30 10:37:09 +03:00
|
|
|
#define PCI_HOST_PROP_RAM_MEM "ram-mem"
|
|
|
|
#define PCI_HOST_PROP_PCI_MEM "pci-mem"
|
|
|
|
#define PCI_HOST_PROP_SYSTEM_MEM "system-mem"
|
|
|
|
#define PCI_HOST_PROP_IO_MEM "io-mem"
|
2013-07-29 18:47:57 +04:00
|
|
|
#define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start"
|
|
|
|
#define PCI_HOST_PROP_PCI_HOLE_END "pci-hole-end"
|
|
|
|
#define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start"
|
|
|
|
#define PCI_HOST_PROP_PCI_HOLE64_END "pci-hole64-end"
|
|
|
|
#define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size"
|
2016-06-22 15:24:49 +03:00
|
|
|
#define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size"
|
|
|
|
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
|
2024-03-20 11:39:14 +03:00
|
|
|
#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
|
2013-08-27 09:37:26 +04:00
|
|
|
|
2024-05-30 14:16:30 +03:00
|
|
|
typedef enum {
|
|
|
|
SEV_DESC_TYPE_UNDEF,
|
|
|
|
/* The section contains the region that must be validated by the VMM. */
|
|
|
|
SEV_DESC_TYPE_SNP_SEC_MEM,
|
|
|
|
/* The section contains the SNP secrets page */
|
|
|
|
SEV_DESC_TYPE_SNP_SECRETS,
|
|
|
|
/* The section contains address that can be used as a CPUID page */
|
|
|
|
SEV_DESC_TYPE_CPUID,
|
|
|
|
|
|
|
|
} ovmf_sev_metadata_desc_type;
|
|
|
|
|
|
|
|
typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc {
|
|
|
|
uint32_t base;
|
|
|
|
uint32_t len;
|
|
|
|
ovmf_sev_metadata_desc_type type;
|
|
|
|
} OvmfSevMetadataDesc;
|
|
|
|
|
|
|
|
typedef struct __attribute__((__packed__)) OvmfSevMetadata {
|
|
|
|
uint8_t signature[4];
|
|
|
|
uint32_t len;
|
|
|
|
uint32_t version;
|
|
|
|
uint32_t num_desc;
|
|
|
|
OvmfSevMetadataDesc descs[];
|
|
|
|
} OvmfSevMetadata;
|
|
|
|
|
|
|
|
OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void);
|
2013-07-29 18:47:57 +04:00
|
|
|
|
2023-01-05 20:38:26 +03:00
|
|
|
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
|
2013-10-29 16:57:34 +04:00
|
|
|
MemoryRegion *pci_address_space);
|
2013-07-29 18:47:57 +04:00
|
|
|
|
2015-12-11 21:42:25 +03:00
|
|
|
void xen_load_linux(PCMachineState *pcms);
|
2015-12-11 21:42:24 +03:00
|
|
|
void pc_memory_init(PCMachineState *pcms,
|
|
|
|
MemoryRegion *system_memory,
|
|
|
|
MemoryRegion *rom_memory,
|
2022-07-19 20:00:06 +03:00
|
|
|
uint64_t pci_hole64_size);
|
2017-11-11 18:25:00 +03:00
|
|
|
uint64_t pc_pci_hole64_start(void);
|
2011-12-16 01:09:51 +04:00
|
|
|
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
|
2020-07-02 16:25:16 +03:00
|
|
|
void pc_basic_device_init(struct PCMachineState *pcms,
|
|
|
|
ISABus *isa_bus, qemu_irq *gsi,
|
2023-05-19 11:47:34 +03:00
|
|
|
ISADevice *rtc_state,
|
2015-05-28 23:04:08 +03:00
|
|
|
bool create_fdctrl,
|
2016-01-22 18:09:21 +03:00
|
|
|
uint32_t hpet_irqs);
|
2023-10-17 19:53:58 +03:00
|
|
|
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus);
|
2010-05-14 11:29:04 +04:00
|
|
|
|
2019-10-18 16:59:09 +03:00
|
|
|
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs);
|
2012-11-15 00:54:01 +04:00
|
|
|
|
2019-12-13 13:51:00 +03:00
|
|
|
/* port92.c */
|
2016-06-22 15:24:52 +03:00
|
|
|
#define PORT92_A20_LINE "a20"
|
|
|
|
|
2019-12-13 13:51:00 +03:00
|
|
|
#define TYPE_PORT92 "port92"
|
|
|
|
|
2012-02-22 11:18:51 +04:00
|
|
|
/* pc_sysfw.c */
|
2024-02-27 00:59:09 +03:00
|
|
|
void pc_system_flash_create(PCMachineState *pcms);
|
|
|
|
void pc_system_flash_cleanup_unused(PCMachineState *pcms);
|
2019-03-08 16:14:43 +03:00
|
|
|
void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
|
2021-02-04 22:39:38 +03:00
|
|
|
bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
|
|
|
|
int *data_len);
|
2021-05-21 16:34:07 +03:00
|
|
|
void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size);
|
2021-02-04 22:39:38 +03:00
|
|
|
|
2021-07-19 14:21:15 +03:00
|
|
|
/* sgx.c */
|
|
|
|
void pc_machine_init_sgx_epc(PCMachineState *pcms);
|
|
|
|
|
2024-03-25 17:01:51 +03:00
|
|
|
extern GlobalProperty pc_compat_9_0[];
|
|
|
|
extern const size_t pc_compat_9_0_len;
|
|
|
|
|
2023-11-20 12:42:59 +03:00
|
|
|
extern GlobalProperty pc_compat_8_2[];
|
|
|
|
extern const size_t pc_compat_8_2_len;
|
|
|
|
|
2023-07-18 17:22:35 +03:00
|
|
|
extern GlobalProperty pc_compat_8_1[];
|
|
|
|
extern const size_t pc_compat_8_1_len;
|
|
|
|
|
2023-03-14 20:30:09 +03:00
|
|
|
extern GlobalProperty pc_compat_8_0[];
|
|
|
|
extern const size_t pc_compat_8_0_len;
|
|
|
|
|
2022-12-12 18:21:44 +03:00
|
|
|
extern GlobalProperty pc_compat_7_2[];
|
|
|
|
extern const size_t pc_compat_7_2_len;
|
|
|
|
|
2022-07-27 15:17:55 +03:00
|
|
|
extern GlobalProperty pc_compat_7_1[];
|
|
|
|
extern const size_t pc_compat_7_1_len;
|
|
|
|
|
2022-03-16 17:55:21 +03:00
|
|
|
extern GlobalProperty pc_compat_7_0[];
|
|
|
|
extern const size_t pc_compat_7_0_len;
|
|
|
|
|
2021-12-17 17:39:48 +03:00
|
|
|
extern GlobalProperty pc_compat_6_2[];
|
|
|
|
extern const size_t pc_compat_6_2_len;
|
|
|
|
|
2021-08-31 04:54:26 +03:00
|
|
|
extern GlobalProperty pc_compat_6_1[];
|
|
|
|
extern const size_t pc_compat_6_1_len;
|
|
|
|
|
2021-03-31 14:19:00 +03:00
|
|
|
extern GlobalProperty pc_compat_6_0[];
|
|
|
|
extern const size_t pc_compat_6_0_len;
|
|
|
|
|
2020-11-09 20:39:28 +03:00
|
|
|
extern GlobalProperty pc_compat_5_2[];
|
|
|
|
extern const size_t pc_compat_5_2_len;
|
|
|
|
|
2020-08-19 17:40:16 +03:00
|
|
|
extern GlobalProperty pc_compat_5_1[];
|
|
|
|
extern const size_t pc_compat_5_1_len;
|
|
|
|
|
2020-04-29 17:46:05 +03:00
|
|
|
extern GlobalProperty pc_compat_5_0[];
|
|
|
|
extern const size_t pc_compat_5_0_len;
|
|
|
|
|
2019-11-12 13:48:11 +03:00
|
|
|
extern GlobalProperty pc_compat_4_2[];
|
|
|
|
extern const size_t pc_compat_4_2_len;
|
|
|
|
|
2019-07-24 13:35:24 +03:00
|
|
|
extern GlobalProperty pc_compat_4_1[];
|
|
|
|
extern const size_t pc_compat_4_1_len;
|
|
|
|
|
2019-04-11 13:20:25 +03:00
|
|
|
extern GlobalProperty pc_compat_4_0[];
|
|
|
|
extern const size_t pc_compat_4_0_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_3_1[];
|
|
|
|
extern const size_t pc_compat_3_1_len;
|
2018-12-04 19:27:16 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_3_0[];
|
|
|
|
extern const size_t pc_compat_3_0_len;
|
2018-09-21 11:22:10 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_12[];
|
|
|
|
extern const size_t pc_compat_2_12_len;
|
2018-05-14 19:41:50 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_11[];
|
|
|
|
extern const size_t pc_compat_2_11_len;
|
2017-12-19 06:37:29 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_10[];
|
|
|
|
extern const size_t pc_compat_2_10_len;
|
2017-09-06 17:26:57 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_9[];
|
|
|
|
extern const size_t pc_compat_2_9_len;
|
2017-04-25 12:49:13 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_8[];
|
|
|
|
extern const size_t pc_compat_2_8_len;
|
2017-01-18 20:53:45 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_7[];
|
|
|
|
extern const size_t pc_compat_2_7_len;
|
target-i386: present virtual L3 cache info for vcpus
Some software algorithms are based on the hardware's cache info, for example,
for x86 linux kernel, when cpu1 want to wakeup a task on cpu2, cpu1 will trigger
a resched IPI and told cpu2 to do the wakeup if they don't share low level
cache. Oppositely, cpu1 will access cpu2's runqueue directly if they share llc.
The relevant linux-kernel code as bellow:
static void ttwu_queue(struct task_struct *p, int cpu)
{
struct rq *rq = cpu_rq(cpu);
......
if (... && !cpus_share_cache(smp_processor_id(), cpu)) {
......
ttwu_queue_remote(p, cpu); /* will trigger RES IPI */
return;
}
......
ttwu_do_activate(rq, p, 0); /* access target's rq directly */
......
}
In real hardware, the cpus on the same socket share L3 cache, so one won't
trigger a resched IPIs when wakeup a task on others. But QEMU doesn't present a
virtual L3 cache info for VM, then the linux guest will trigger lots of RES IPIs
under some workloads even if the virtual cpus belongs to the same virtual socket.
For KVM, there will be lots of vmexit due to guest send IPIs.
The workload is a SAP HANA's testsuite, we run it one round(about 40 minuates)
and observe the (Suse11sp3)Guest's amounts of RES IPIs which triggering during
the period:
No-L3 With-L3(applied this patch)
cpu0: 363890 44582
cpu1: 373405 43109
cpu2: 340783 43797
cpu3: 333854 43409
cpu4: 327170 40038
cpu5: 325491 39922
cpu6: 319129 42391
cpu7: 306480 41035
cpu8: 161139 32188
cpu9: 164649 31024
cpu10: 149823 30398
cpu11: 149823 32455
cpu12: 164830 35143
cpu13: 172269 35805
cpu14: 179979 33898
cpu15: 194505 32754
avg: 268963.6 40129.8
The VM's topology is "1*socket 8*cores 2*threads".
After present virtual L3 cache info for VM, the amounts of RES IPIs in guest
reduce 85%.
For KVM, vcpus send IPIs will cause vmexit which is expensive, so it can cause
severe performance degradation. We had tested the overall system performance if
vcpus actually run on sparate physical socket. With L3 cache, the performance
improves 7.2%~33.1%(avg:15.7%).
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-07 08:21:13 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_6[];
|
|
|
|
extern const size_t pc_compat_2_6_len;
|
2016-05-17 17:43:10 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_5[];
|
|
|
|
extern const size_t pc_compat_2_5_len;
|
2015-11-30 17:56:36 +03:00
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_4[];
|
|
|
|
extern const size_t pc_compat_2_4_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_3[];
|
|
|
|
extern const size_t pc_compat_2_3_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_2[];
|
|
|
|
extern const size_t pc_compat_2_2_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty pc_compat_2_1[];
|
|
|
|
extern const size_t pc_compat_2_1_len;
|
|
|
|
|
2018-12-12 20:57:53 +03:00
|
|
|
extern GlobalProperty pc_compat_2_0[];
|
|
|
|
extern const size_t pc_compat_2_0_len;
|
|
|
|
|
2015-05-15 20:18:55 +03:00
|
|
|
#define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
|
2015-05-15 20:18:56 +03:00
|
|
|
static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
|
|
|
|
{ \
|
|
|
|
MachineClass *mc = MACHINE_CLASS(oc); \
|
|
|
|
optsfn(mc); \
|
|
|
|
mc->init = initfn; \
|
|
|
|
} \
|
|
|
|
static const TypeInfo pc_machine_type_##suffix = { \
|
|
|
|
.name = namestr TYPE_MACHINE_SUFFIX, \
|
|
|
|
.parent = TYPE_PC_MACHINE, \
|
|
|
|
.class_init = pc_machine_##suffix##_class_init, \
|
|
|
|
}; \
|
2015-05-15 20:18:53 +03:00
|
|
|
static void pc_machine_init_##suffix(void) \
|
|
|
|
{ \
|
2015-05-15 20:18:56 +03:00
|
|
|
type_register(&pc_machine_type_##suffix); \
|
2015-05-15 20:18:53 +03:00
|
|
|
} \
|
2016-02-16 23:59:04 +03:00
|
|
|
type_init(pc_machine_init_##suffix)
|
2015-05-15 20:18:53 +03:00
|
|
|
|
2007-11-17 20:14:51 +03:00
|
|
|
#endif
|