qemu/include/hw/i386/pc.h

334 lines
9.5 KiB
C
Raw Normal View History

#ifndef HW_PC_H
#define HW_PC_H
#include "exec/memory.h"
#include "hw/boards.h"
#include "hw/isa/isa.h"
#include "hw/block/fdc.h"
pc: Support firmware configuration with -blockdev The PC machines put firmware in ROM by default. To get it put into flash memory (required by OVMF), you have to use -drive if=pflash,unit=0,... and optionally -drive if=pflash,unit=1,... Why two -drive? This permits setting up one part of the flash memory read-only, and the other part read/write. It also makes upgrading firmware on the host easier. Below the hood, it creates two separate flash devices, because we were too lazy to improve our flash device models to support sector protection. The problem at hand is to do the same with -blockdev somehow, as one more step towards deprecating -drive. Mapping -drive if=none,... to -blockdev is a solved problem. With if=T other than if=none, -drive additionally configures a block device frontend. For non-onboard devices, that part maps to -device. Also a solved problem. For onboard devices such as PC flash memory, we have an unsolved problem. This is actually an instance of a wider problem: our general device configuration interface doesn't cover onboard devices. Instead, we have a zoo of ad hoc interfaces that are much more limited. One of them is -drive, which we'd rather deprecate, but can't until we have suitable replacements for all its uses. Sadly, I can't attack the wider problem today. So back to the narrow problem. My first idea was to reduce it to its solved buddy by using pluggable instead of onboard devices for the flash memory. Workable, but it requires some extra smarts in firmware descriptors and libvirt. Paolo had an idea that is simpler for libvirt: keep the devices onboard, and add machine properties for their block backends. The implementation is less than straightforward, I'm afraid. First, block backend properties are *qdev* properties. Machines can't have those, as they're not devices. I could duplicate these qdev properties as QOM properties, but I hate that. More seriously, the properties do not belong to the machine, they belong to the onboard flash devices. Adding them to the machine would then require bad magic to somehow transfer them to the flash devices. Fortunately, QOM provides the means to handle exactly this case: add alias properties to the machine that forward to the onboard devices' properties. Properties need to be created in .instance_init() methods. For PC machines, that's pc_machine_initfn(). To make alias properties work, we need to create the onboard flash devices there, too. Requires several bug fixes, in the previous commits. We also have to realize the devices. More on that below. If the user sets pflash0, firmware resides in flash memory. pc_system_firmware_init() maps and realizes the flash devices. Else, firmware resides in ROM. The onboard flash devices aren't used then. pc_system_firmware_init() destroys them unrealized, along with the alias properties. The existing code to pick up drives defined with -drive if=pflash is replaced by code to desugar into the machine properties. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Acked-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <87ftrtux81.fsf@dusky.pond.sub.org>
2019-03-11 20:39:26 +03:00
#include "hw/block/flash.h"
#include "net/net.h"
#include "hw/i386/ioapic.h"
#include "hw/i386/x86.h"
#include "qemu/range.h"
#include "qemu/bitmap.h"
#include "qemu/module.h"
#include "hw/pci/pci.h"
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#include "hw/acpi/acpi_dev_interface.h"
#define HPET_INTCAP "hpet-intcap"
/**
* PCMachineState:
* @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
* @boot_cpus: number of present VCPUs
* @smp_dies: number of dies per one package
*/
struct PCMachineState {
/*< private >*/
X86MachineState parent_obj;
/* <public> */
/* State for other subsystems/APIs: */
Notifier machine_done;
/* Pointers to devices and objects: */
HotplugHandler *acpi_dev;
PCIBus *bus;
I2CBus *smbus;
pc: Support firmware configuration with -blockdev The PC machines put firmware in ROM by default. To get it put into flash memory (required by OVMF), you have to use -drive if=pflash,unit=0,... and optionally -drive if=pflash,unit=1,... Why two -drive? This permits setting up one part of the flash memory read-only, and the other part read/write. It also makes upgrading firmware on the host easier. Below the hood, it creates two separate flash devices, because we were too lazy to improve our flash device models to support sector protection. The problem at hand is to do the same with -blockdev somehow, as one more step towards deprecating -drive. Mapping -drive if=none,... to -blockdev is a solved problem. With if=T other than if=none, -drive additionally configures a block device frontend. For non-onboard devices, that part maps to -device. Also a solved problem. For onboard devices such as PC flash memory, we have an unsolved problem. This is actually an instance of a wider problem: our general device configuration interface doesn't cover onboard devices. Instead, we have a zoo of ad hoc interfaces that are much more limited. One of them is -drive, which we'd rather deprecate, but can't until we have suitable replacements for all its uses. Sadly, I can't attack the wider problem today. So back to the narrow problem. My first idea was to reduce it to its solved buddy by using pluggable instead of onboard devices for the flash memory. Workable, but it requires some extra smarts in firmware descriptors and libvirt. Paolo had an idea that is simpler for libvirt: keep the devices onboard, and add machine properties for their block backends. The implementation is less than straightforward, I'm afraid. First, block backend properties are *qdev* properties. Machines can't have those, as they're not devices. I could duplicate these qdev properties as QOM properties, but I hate that. More seriously, the properties do not belong to the machine, they belong to the onboard flash devices. Adding them to the machine would then require bad magic to somehow transfer them to the flash devices. Fortunately, QOM provides the means to handle exactly this case: add alias properties to the machine that forward to the onboard devices' properties. Properties need to be created in .instance_init() methods. For PC machines, that's pc_machine_initfn(). To make alias properties work, we need to create the onboard flash devices there, too. Requires several bug fixes, in the previous commits. We also have to realize the devices. More on that below. If the user sets pflash0, firmware resides in flash memory. pc_system_firmware_init() maps and realizes the flash devices. Else, firmware resides in ROM. The onboard flash devices aren't used then. pc_system_firmware_init() destroys them unrealized, along with the alias properties. The existing code to pick up drives defined with -drive if=pflash is replaced by code to desugar into the machine properties. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Acked-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <87ftrtux81.fsf@dusky.pond.sub.org>
2019-03-11 20:39:26 +03:00
PFlashCFI01 *flash[2];
/* Configuration options: */
OnOffAuto vmport;
OnOffAuto smm;
bool acpi_build_enabled;
bool smbus_enabled;
bool sata_enabled;
bool pit_enabled;
/* NUMA information: */
uint64_t numa_nodes;
uint64_t *node_mem;
/* ACPI Memory hotplug IO base address */
hwaddr memhp_io_base;
};
#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
#define PC_MACHINE_DEVMEM_REGION_SIZE "device-memory-region-size"
#define PC_MACHINE_VMPORT "vmport"
#define PC_MACHINE_SMM "smm"
#define PC_MACHINE_SMBUS "smbus"
#define PC_MACHINE_SATA "sata"
#define PC_MACHINE_PIT "pit"
/**
* PCMachineClass:
*
* Compat fields:
*
* @enforce_aligned_dimm: check that DIMM's address/size is aligned by
* backend's alignment value if provided
* @acpi_data_size: Size of the chunk of memory at the top of RAM
* for the BIOS ACPI tables and other BIOS
* datastructures.
* @gigabyte_align: Make sure that guest addresses aligned at
* 1Gbyte boundaries get mapped to host
* addresses aligned at 1Gbyte boundaries. This
* way we can use 1GByte pages in the host.
*
*/
typedef struct PCMachineClass {
/*< private >*/
X86MachineClass parent_class;
/*< public >*/
/* Device configuration: */
bool pci_enabled;
bool kvmclock_enabled;
const char *default_nic_model;
/* Compat options: */
/* Default CPU model version. See x86_cpu_set_default_version(). */
int default_cpu_version;
/* ACPI compat: */
bool has_acpi_build;
bool rsdp_in_ram;
int legacy_acpi_table_size;
unsigned acpi_data_size;
bool do_not_add_smb_acpi;
/* SMBIOS compat: */
bool smbios_defaults;
bool smbios_legacy_mode;
bool smbios_uuid_encoded;
/* RAM / address space compat: */
bool gigabyte_align;
bool has_reserved_memory;
bool enforce_aligned_dimm;
bool broken_reserved_end;
/* generate legacy CPU hotplug AML */
bool legacy_cpu_hotplug;
/* use DMA capable linuxboot option rom */
bool linuxboot_dma_enabled;
/* use PVH to load kernels that support this feature */
bool pvh_enabled;
} PCMachineClass;
#define TYPE_PC_MACHINE "generic-pc-machine"
#define PC_MACHINE(obj) \
OBJECT_CHECK(PCMachineState, (obj), TYPE_PC_MACHINE)
#define PC_MACHINE_GET_CLASS(obj) \
OBJECT_GET_CLASS(PCMachineClass, (obj), TYPE_PC_MACHINE)
#define PC_MACHINE_CLASS(klass) \
OBJECT_CLASS_CHECK(PCMachineClass, (klass), TYPE_PC_MACHINE)
/* i8259.c */
extern DeviceState *isa_pic;
qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq);
qemu_irq *kvm_i8259_init(ISABus *bus);
int pic_read_irq(DeviceState *d);
int pic_get_output(DeviceState *d);
/* ioapic.c */
/* Global System Interrupts */
#define GSI_NUM_PINS IOAPIC_NUM_PINS
typedef struct GSIState {
qemu_irq i8259_irq[ISA_NUM_IRQS];
qemu_irq ioapic_irq[IOAPIC_NUM_PINS];
} GSIState;
void gsi_handler(void *opaque, int n, int level);
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled);
/* vmport.c */
#define TYPE_VMPORT "vmport"
typedef uint32_t (VMPortReadFunc)(void *opaque, uint32_t address);
static inline void vmport_init(ISABus *bus)
{
isa_create_simple(bus, TYPE_VMPORT);
}
void vmport_register(unsigned char command, VMPortReadFunc *func, void *opaque);
void vmmouse_get_data(uint32_t *data);
void vmmouse_set_data(const uint32_t *data);
/* pc.c */
extern int fd_bootchk;
bool pc_machine_is_smm_enabled(PCMachineState *pcms);
void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp);
void pc_smp_parse(MachineState *ms, QemuOpts *opts);
void pc_guest_info_init(PCMachineState *pcms);
#define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start"
#define PCI_HOST_PROP_PCI_HOLE_END "pci-hole-end"
#define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start"
#define PCI_HOST_PROP_PCI_HOLE64_END "pci-hole64-end"
#define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size"
#define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size"
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
MemoryRegion *pci_address_space);
void xen_load_linux(PCMachineState *pcms);
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
MemoryRegion **ram_memory);
uint64_t pc_pci_hole64_start(void);
qemu_irq pc_allocate_cpu_irq(void);
DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
ISADevice **rtc_state,
bool create_fdctrl,
bool no_vmport,
bool has_pit,
uint32_t hpet_irqs);
void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
void pc_cmos_init(PCMachineState *pcms,
BusState *ide0, BusState *ide1,
ISADevice *s);
void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus);
void pc_pci_device_init(PCIBus *pci_bus);
typedef void (*cpu_set_smm_t)(int smm, void *arg);
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs);
void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name);
ISADevice *pc_find_fdc0(void);
int cmos_get_fd_drive_type(FloppyDriveType fd0);
#define FW_CFG_IO_BASE 0x510
#define PORT92_A20_LINE "a20"
/* hpet.c */
extern int no_hpet;
/* pc_sysfw.c */
pc: Support firmware configuration with -blockdev The PC machines put firmware in ROM by default. To get it put into flash memory (required by OVMF), you have to use -drive if=pflash,unit=0,... and optionally -drive if=pflash,unit=1,... Why two -drive? This permits setting up one part of the flash memory read-only, and the other part read/write. It also makes upgrading firmware on the host easier. Below the hood, it creates two separate flash devices, because we were too lazy to improve our flash device models to support sector protection. The problem at hand is to do the same with -blockdev somehow, as one more step towards deprecating -drive. Mapping -drive if=none,... to -blockdev is a solved problem. With if=T other than if=none, -drive additionally configures a block device frontend. For non-onboard devices, that part maps to -device. Also a solved problem. For onboard devices such as PC flash memory, we have an unsolved problem. This is actually an instance of a wider problem: our general device configuration interface doesn't cover onboard devices. Instead, we have a zoo of ad hoc interfaces that are much more limited. One of them is -drive, which we'd rather deprecate, but can't until we have suitable replacements for all its uses. Sadly, I can't attack the wider problem today. So back to the narrow problem. My first idea was to reduce it to its solved buddy by using pluggable instead of onboard devices for the flash memory. Workable, but it requires some extra smarts in firmware descriptors and libvirt. Paolo had an idea that is simpler for libvirt: keep the devices onboard, and add machine properties for their block backends. The implementation is less than straightforward, I'm afraid. First, block backend properties are *qdev* properties. Machines can't have those, as they're not devices. I could duplicate these qdev properties as QOM properties, but I hate that. More seriously, the properties do not belong to the machine, they belong to the onboard flash devices. Adding them to the machine would then require bad magic to somehow transfer them to the flash devices. Fortunately, QOM provides the means to handle exactly this case: add alias properties to the machine that forward to the onboard devices' properties. Properties need to be created in .instance_init() methods. For PC machines, that's pc_machine_initfn(). To make alias properties work, we need to create the onboard flash devices there, too. Requires several bug fixes, in the previous commits. We also have to realize the devices. More on that below. If the user sets pflash0, firmware resides in flash memory. pc_system_firmware_init() maps and realizes the flash devices. Else, firmware resides in ROM. The onboard flash devices aren't used then. pc_system_firmware_init() destroys them unrealized, along with the alias properties. The existing code to pick up drives defined with -drive if=pflash is replaced by code to desugar into the machine properties. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Acked-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-Id: <87ftrtux81.fsf@dusky.pond.sub.org>
2019-03-11 20:39:26 +03:00
void pc_system_flash_create(PCMachineState *pcms);
void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
/* acpi-build.c */
void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
const CPUArchIdList *apic_ids, GArray *entry);
extern GlobalProperty pc_compat_4_2[];
extern const size_t pc_compat_4_2_len;
extern GlobalProperty pc_compat_4_1[];
extern const size_t pc_compat_4_1_len;
extern GlobalProperty pc_compat_4_0[];
extern const size_t pc_compat_4_0_len;
extern GlobalProperty pc_compat_3_1[];
extern const size_t pc_compat_3_1_len;
extern GlobalProperty pc_compat_3_0[];
extern const size_t pc_compat_3_0_len;
extern GlobalProperty pc_compat_2_12[];
extern const size_t pc_compat_2_12_len;
extern GlobalProperty pc_compat_2_11[];
extern const size_t pc_compat_2_11_len;
extern GlobalProperty pc_compat_2_10[];
extern const size_t pc_compat_2_10_len;
extern GlobalProperty pc_compat_2_9[];
extern const size_t pc_compat_2_9_len;
extern GlobalProperty pc_compat_2_8[];
extern const size_t pc_compat_2_8_len;
extern GlobalProperty pc_compat_2_7[];
extern const size_t pc_compat_2_7_len;
target-i386: present virtual L3 cache info for vcpus Some software algorithms are based on the hardware's cache info, for example, for x86 linux kernel, when cpu1 want to wakeup a task on cpu2, cpu1 will trigger a resched IPI and told cpu2 to do the wakeup if they don't share low level cache. Oppositely, cpu1 will access cpu2's runqueue directly if they share llc. The relevant linux-kernel code as bellow: static void ttwu_queue(struct task_struct *p, int cpu) { struct rq *rq = cpu_rq(cpu); ...... if (... && !cpus_share_cache(smp_processor_id(), cpu)) { ...... ttwu_queue_remote(p, cpu); /* will trigger RES IPI */ return; } ...... ttwu_do_activate(rq, p, 0); /* access target's rq directly */ ...... } In real hardware, the cpus on the same socket share L3 cache, so one won't trigger a resched IPIs when wakeup a task on others. But QEMU doesn't present a virtual L3 cache info for VM, then the linux guest will trigger lots of RES IPIs under some workloads even if the virtual cpus belongs to the same virtual socket. For KVM, there will be lots of vmexit due to guest send IPIs. The workload is a SAP HANA's testsuite, we run it one round(about 40 minuates) and observe the (Suse11sp3)Guest's amounts of RES IPIs which triggering during the period: No-L3 With-L3(applied this patch) cpu0: 363890 44582 cpu1: 373405 43109 cpu2: 340783 43797 cpu3: 333854 43409 cpu4: 327170 40038 cpu5: 325491 39922 cpu6: 319129 42391 cpu7: 306480 41035 cpu8: 161139 32188 cpu9: 164649 31024 cpu10: 149823 30398 cpu11: 149823 32455 cpu12: 164830 35143 cpu13: 172269 35805 cpu14: 179979 33898 cpu15: 194505 32754 avg: 268963.6 40129.8 The VM's topology is "1*socket 8*cores 2*threads". After present virtual L3 cache info for VM, the amounts of RES IPIs in guest reduce 85%. For KVM, vcpus send IPIs will cause vmexit which is expensive, so it can cause severe performance degradation. We had tested the overall system performance if vcpus actually run on sparate physical socket. With L3 cache, the performance improves 7.2%~33.1%(avg:15.7%). Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2016-09-07 08:21:13 +03:00
extern GlobalProperty pc_compat_2_6[];
extern const size_t pc_compat_2_6_len;
extern GlobalProperty pc_compat_2_5[];
extern const size_t pc_compat_2_5_len;
extern GlobalProperty pc_compat_2_4[];
extern const size_t pc_compat_2_4_len;
extern GlobalProperty pc_compat_2_3[];
extern const size_t pc_compat_2_3_len;
extern GlobalProperty pc_compat_2_2[];
extern const size_t pc_compat_2_2_len;
extern GlobalProperty pc_compat_2_1[];
extern const size_t pc_compat_2_1_len;
extern GlobalProperty pc_compat_2_0[];
extern const size_t pc_compat_2_0_len;
extern GlobalProperty pc_compat_1_7[];
extern const size_t pc_compat_1_7_len;
extern GlobalProperty pc_compat_1_6[];
extern const size_t pc_compat_1_6_len;
extern GlobalProperty pc_compat_1_5[];
extern const size_t pc_compat_1_5_len;
extern GlobalProperty pc_compat_1_4[];
extern const size_t pc_compat_1_4_len;
/* Helper for setting model-id for CPU models that changed model-id
* depending on QEMU versions up to QEMU 2.4.
*/
#define PC_CPU_MODEL_IDS(v) \
{ "qemu32-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
{ "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
{ "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
#define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
{ \
MachineClass *mc = MACHINE_CLASS(oc); \
optsfn(mc); \
mc->init = initfn; \
} \
static const TypeInfo pc_machine_type_##suffix = { \
.name = namestr TYPE_MACHINE_SUFFIX, \
.parent = TYPE_PC_MACHINE, \
.class_init = pc_machine_##suffix##_class_init, \
}; \
static void pc_machine_init_##suffix(void) \
{ \
type_register(&pc_machine_type_##suffix); \
} \
type_init(pc_machine_init_##suffix)
extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id);
#endif