2007-11-17 20:14:51 +03:00
|
|
|
/* Declarations for use by board files for creating devices. */
|
|
|
|
|
|
|
|
#ifndef HW_BOARDS_H
|
|
|
|
#define HW_BOARDS_H
|
|
|
|
|
2019-08-12 08:23:46 +03:00
|
|
|
#include "exec/memory.h"
|
2020-02-19 19:08:36 +03:00
|
|
|
#include "sysemu/hostmem.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/blockdev.h"
|
2019-06-19 23:10:41 +03:00
|
|
|
#include "qapi/qapi-types-machine.h"
|
2019-05-23 17:35:07 +03:00
|
|
|
#include "qemu/module.h"
|
2014-03-05 21:30:45 +04:00
|
|
|
#include "qom/object.h"
|
2019-07-09 18:20:52 +03:00
|
|
|
#include "hw/core/cpu.h"
|
2009-07-15 15:48:21 +04:00
|
|
|
|
2014-05-14 13:43:15 +04:00
|
|
|
#define TYPE_MACHINE_SUFFIX "-machine"
|
2015-08-21 00:54:27 +03:00
|
|
|
|
|
|
|
/* Machine class name that needs to be used for class-name-based machine
|
|
|
|
* type lookup to work.
|
|
|
|
*/
|
|
|
|
#define MACHINE_TYPE_NAME(machinename) (machinename TYPE_MACHINE_SUFFIX)
|
|
|
|
|
2014-03-05 21:30:45 +04:00
|
|
|
#define TYPE_MACHINE "machine"
|
2014-03-18 19:26:35 +04:00
|
|
|
#undef MACHINE /* BSD defines it and QEMU does not use it */
|
2020-09-16 21:25:18 +03:00
|
|
|
OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE)
|
2014-03-05 21:30:45 +04:00
|
|
|
|
2014-03-05 21:30:47 +04:00
|
|
|
extern MachineState *current_machine;
|
|
|
|
|
2023-11-16 19:33:12 +03:00
|
|
|
/**
|
|
|
|
* machine_class_default_cpu_type: Return the machine default CPU type.
|
|
|
|
* @mc: Machine class
|
|
|
|
*/
|
|
|
|
const char *machine_class_default_cpu_type(MachineClass *mc);
|
|
|
|
|
2022-04-25 11:21:50 +03:00
|
|
|
void machine_add_audiodev_property(MachineClass *mc);
|
2022-04-14 19:52:59 +03:00
|
|
|
void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp);
|
2015-01-06 16:29:13 +03:00
|
|
|
bool machine_usb(MachineState *machine);
|
2015-02-04 18:43:53 +03:00
|
|
|
int machine_phandle_start(MachineState *machine);
|
2015-02-04 18:43:54 +03:00
|
|
|
bool machine_dump_guest_core(MachineState *machine);
|
2015-02-04 18:43:55 +03:00
|
|
|
bool machine_mem_merge(MachineState *machine);
|
2024-03-20 11:39:03 +03:00
|
|
|
bool machine_require_guest_memfd(MachineState *machine);
|
2017-02-09 14:08:38 +03:00
|
|
|
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
|
2017-05-10 14:29:50 +03:00
|
|
|
void machine_set_cpu_numa_node(MachineState *machine,
|
|
|
|
const CpuInstanceProperties *props,
|
|
|
|
Error **errp);
|
2021-11-11 12:21:23 +03:00
|
|
|
void machine_parse_smp_config(MachineState *ms,
|
|
|
|
const SMPConfiguration *config, Error **errp);
|
2023-06-28 16:54:34 +03:00
|
|
|
unsigned int machine_topo_get_cores_per_socket(const MachineState *ms);
|
|
|
|
unsigned int machine_topo_get_threads_per_socket(const MachineState *ms);
|
2023-06-23 15:45:45 +03:00
|
|
|
void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size);
|
2015-01-06 16:29:13 +03:00
|
|
|
|
2021-03-25 18:33:07 +03:00
|
|
|
/**
|
|
|
|
* machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices
|
|
|
|
* @mc: Machine class
|
|
|
|
* @type: type to allow (should be a subtype of TYPE_SYS_BUS_DEVICE)
|
|
|
|
*
|
|
|
|
* Add the QOM type @type to the list of devices of which are subtypes
|
|
|
|
* of TYPE_SYS_BUS_DEVICE but which are still permitted to be dynamically
|
|
|
|
* created (eg by the user on the command line with -device).
|
|
|
|
* By default if the user tries to create any devices on the command line
|
|
|
|
* that are subtypes of TYPE_SYS_BUS_DEVICE they will get an error message;
|
|
|
|
* for the special cases which are permitted for this machine model, the
|
|
|
|
* machine model class init code must call this function to add them
|
|
|
|
* to the list of specifically permitted devices.
|
|
|
|
*/
|
2017-11-25 18:16:05 +03:00
|
|
|
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
|
2021-03-25 18:33:07 +03:00
|
|
|
|
2021-10-29 17:22:56 +03:00
|
|
|
/**
|
|
|
|
* device_type_is_dynamic_sysbus: Check if type is an allowed sysbus device
|
|
|
|
* type for the machine class.
|
|
|
|
* @mc: Machine class
|
|
|
|
* @type: type to check (should be a subtype of TYPE_SYS_BUS_DEVICE)
|
|
|
|
*
|
|
|
|
* Returns: true if @type is a type in the machine's list of
|
|
|
|
* dynamically pluggable sysbus devices; otherwise false.
|
|
|
|
*
|
|
|
|
* Check if the QOM type @type is in the list of allowed sysbus device
|
|
|
|
* types (see machine_class_allowed_dynamic_sysbus_dev()).
|
|
|
|
* Note that if @type has a parent type in the list, it is allowed too.
|
|
|
|
*/
|
|
|
|
bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type);
|
|
|
|
|
2021-03-25 18:33:08 +03:00
|
|
|
/**
|
|
|
|
* device_is_dynamic_sysbus: test whether device is a dynamic sysbus device
|
|
|
|
* @mc: Machine class
|
|
|
|
* @dev: device to check
|
|
|
|
*
|
|
|
|
* Returns: true if @dev is a sysbus device on the machine's list
|
|
|
|
* of dynamically pluggable sysbus devices; otherwise false.
|
|
|
|
*
|
|
|
|
* This function checks whether @dev is a valid dynamic sysbus device,
|
|
|
|
* by first confirming that it is a sysbus device and then checking it
|
|
|
|
* against the list of permitted dynamic sysbus devices which has been
|
|
|
|
* set up by the machine using machine_class_allow_dynamic_sysbus_dev().
|
|
|
|
*
|
|
|
|
* It is valid to call this with something that is not a subclass of
|
|
|
|
* TYPE_SYS_BUS_DEVICE; the function will return false in this case.
|
|
|
|
* This allows hotplug callback functions to be written as:
|
|
|
|
* if (device_is_dynamic_sysbus(mc, dev)) {
|
|
|
|
* handle dynamic sysbus case;
|
|
|
|
* } else if (some other kind of hotplug) {
|
|
|
|
* handle that;
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
bool device_is_dynamic_sysbus(MachineClass *mc, DeviceState *dev);
|
|
|
|
|
2020-02-19 19:08:38 +03:00
|
|
|
/*
|
|
|
|
* Checks that backend isn't used, preps it for exclusive usage and
|
|
|
|
* returns migratable MemoryRegion provided by backend.
|
|
|
|
*/
|
|
|
|
MemoryRegion *machine_consume_memdev(MachineState *machine,
|
|
|
|
HostMemoryBackend *backend);
|
2017-11-25 18:16:05 +03:00
|
|
|
|
2016-03-03 17:28:56 +03:00
|
|
|
/**
|
|
|
|
* CPUArchId:
|
|
|
|
* @arch_id - architecture-dependent CPU ID of present or possible CPU
|
|
|
|
* @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
|
2018-01-10 18:22:50 +03:00
|
|
|
* @type - QOM class name of possible @cpu object
|
2017-02-09 14:08:34 +03:00
|
|
|
* @props - CPU object properties, initialized by board
|
2017-02-09 14:08:38 +03:00
|
|
|
* #vcpus_count - number of threads provided by @cpu object
|
2016-03-03 17:28:56 +03:00
|
|
|
*/
|
2019-08-12 08:23:53 +03:00
|
|
|
typedef struct CPUArchId {
|
2016-03-03 17:28:56 +03:00
|
|
|
uint64_t arch_id;
|
2017-02-09 14:08:38 +03:00
|
|
|
int64_t vcpus_count;
|
2017-02-09 14:08:34 +03:00
|
|
|
CpuInstanceProperties props;
|
2024-01-29 19:44:44 +03:00
|
|
|
CPUState *cpu;
|
2018-01-10 18:22:50 +03:00
|
|
|
const char *type;
|
2016-03-03 17:28:56 +03:00
|
|
|
} CPUArchId;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* CPUArchIdList:
|
|
|
|
* @len - number of @CPUArchId items in @cpus array
|
|
|
|
* @cpus - array of present or possible CPUs for current machine configuration
|
|
|
|
*/
|
|
|
|
typedef struct {
|
|
|
|
int len;
|
2020-03-04 18:38:16 +03:00
|
|
|
CPUArchId cpus[];
|
2016-03-03 17:28:56 +03:00
|
|
|
} CPUArchIdList;
|
|
|
|
|
2021-09-29 05:58:12 +03:00
|
|
|
/**
|
|
|
|
* SMPCompatProps:
|
2021-09-29 05:58:14 +03:00
|
|
|
* @prefer_sockets - whether sockets are preferred over cores in smp parsing
|
2021-09-29 05:58:12 +03:00
|
|
|
* @dies_supported - whether dies are supported by the machine
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
|
|
|
* @clusters_supported - whether clusters are supported by the machine
|
2022-12-29 09:55:09 +03:00
|
|
|
* @has_clusters - whether clusters are explicitly specified in the user
|
|
|
|
* provided SMP configuration
|
2023-10-16 21:39:06 +03:00
|
|
|
* @books_supported - whether books are supported by the machine
|
|
|
|
* @drawers_supported - whether drawers are supported by the machine
|
hw/core/machine: Introduce the module as a CPU topology level
In x86, module is the topology level above core, which contains a set
of cores that share certain resources (in current products, the resource
usually includes L2 cache, as well as module scoped features and MSRs).
Though smp.clusters could also share the L2 cache resource [1], there
are following reasons that drive us to introduce the new smp.modules:
* As the CPU topology abstraction in device tree [2], cluster supports
nesting (though currently QEMU hasn't support that). In contrast,
(x86) module does not support nesting.
* Due to nesting, there is great flexibility in sharing resources
on cluster, rather than narrowing cluster down to sharing L2 (and
L3 tags) as the lowest topology level that contains cores.
* Flexible nesting of cluster allows it to correspond to any level
between the x86 package and core.
* In Linux kernel, x86's cluster only represents the L2 cache domain
but QEMU's smp.clusters is the CPU topology level. Linux kernel will
also expose module level topology information in sysfs for x86. To
avoid cluster ambiguity and keep a consistent CPU topology naming
style with the Linux kernel, we introduce module level for x86.
The module is, in existing hardware practice, the lowest layer that
contains the core, while the cluster is able to have a higher
topological scope than the module due to its nesting.
Therefore, place the module between the cluster and the core:
drawer/book/socket/die/cluster/module/core/thread
With the above topological hierarchy order, introduce module level
support in MachineState and MachineClass.
[1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/
[2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt
Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
|
|
|
* @modules_supported - whether modules are supported by the machine
|
2021-09-29 05:58:12 +03:00
|
|
|
*/
|
|
|
|
typedef struct {
|
2021-09-29 05:58:14 +03:00
|
|
|
bool prefer_sockets;
|
2021-09-29 05:58:12 +03:00
|
|
|
bool dies_supported;
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
|
|
|
bool clusters_supported;
|
2022-12-29 09:55:09 +03:00
|
|
|
bool has_clusters;
|
2023-10-16 21:39:06 +03:00
|
|
|
bool books_supported;
|
|
|
|
bool drawers_supported;
|
hw/core/machine: Introduce the module as a CPU topology level
In x86, module is the topology level above core, which contains a set
of cores that share certain resources (in current products, the resource
usually includes L2 cache, as well as module scoped features and MSRs).
Though smp.clusters could also share the L2 cache resource [1], there
are following reasons that drive us to introduce the new smp.modules:
* As the CPU topology abstraction in device tree [2], cluster supports
nesting (though currently QEMU hasn't support that). In contrast,
(x86) module does not support nesting.
* Due to nesting, there is great flexibility in sharing resources
on cluster, rather than narrowing cluster down to sharing L2 (and
L3 tags) as the lowest topology level that contains cores.
* Flexible nesting of cluster allows it to correspond to any level
between the x86 package and core.
* In Linux kernel, x86's cluster only represents the L2 cache domain
but QEMU's smp.clusters is the CPU topology level. Linux kernel will
also expose module level topology information in sysfs for x86. To
avoid cluster ambiguity and keep a consistent CPU topology naming
style with the Linux kernel, we introduce module level for x86.
The module is, in existing hardware practice, the lowest layer that
contains the core, while the cluster is able to have a higher
topological scope than the module due to its nesting.
Therefore, place the module between the cluster and the core:
drawer/book/socket/die/cluster/module/core/thread
With the above topological hierarchy order, introduce module level
support in MachineState and MachineClass.
[1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/
[2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt
Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
|
|
|
bool modules_supported;
|
2021-09-29 05:58:12 +03:00
|
|
|
} SMPCompatProps;
|
|
|
|
|
2014-03-05 21:30:45 +04:00
|
|
|
/**
|
|
|
|
* MachineClass:
|
2018-06-25 12:05:12 +03:00
|
|
|
* @deprecation_reason: If set, the machine is marked as deprecated. The
|
|
|
|
* string should provide some clear information about what to use instead.
|
2017-11-13 16:55:27 +03:00
|
|
|
* @max_cpus: maximum number of CPUs supported. Default: 1
|
|
|
|
* @min_cpus: minimum number of CPUs supported. Default: 1
|
|
|
|
* @default_cpus: number of CPUs instantiated if none are specified. Default: 1
|
2020-02-07 19:19:47 +03:00
|
|
|
* @is_default:
|
|
|
|
* If true QEMU will use this machine by default if no '-M' option is given.
|
2014-06-02 17:25:03 +04:00
|
|
|
* @get_hotplug_handler: this function is called during bus-less
|
|
|
|
* device hotplug. If defined it returns pointer to an instance
|
|
|
|
* of HotplugHandler object, which handles hotplug operation
|
|
|
|
* for a given @dev. It may return NULL if @dev doesn't require
|
|
|
|
* any actions to be performed by hotplug handler.
|
2017-05-10 14:29:45 +03:00
|
|
|
* @cpu_index_to_instance_props:
|
|
|
|
* used to provide @cpu_index to socket/core/thread number mapping, allowing
|
2023-07-14 14:33:02 +03:00
|
|
|
* legacy code to perform mapping from cpu_index to topology properties
|
2017-05-10 14:29:45 +03:00
|
|
|
* Returns: tuple of socket/core/thread ids given cpu_index belongs to.
|
2015-03-19 20:09:21 +03:00
|
|
|
* used to provide @cpu_index to socket number mapping, allowing
|
|
|
|
* a machine to group CPU threads belonging to the same socket/package
|
|
|
|
* Returns: socket number given cpu_index belongs to.
|
2015-11-12 20:29:54 +03:00
|
|
|
* @hw_version:
|
|
|
|
* Value of QEMU_VERSION when the machine was added to QEMU.
|
|
|
|
* Set only by old machines because they need to keep
|
|
|
|
* compatibility on code that exposed QEMU_VERSION to guests in
|
|
|
|
* the past (and now use qemu_hw_version()).
|
2016-03-03 17:28:56 +03:00
|
|
|
* @possible_cpu_arch_ids:
|
|
|
|
* Returns an array of @CPUArchId architecture-dependent CPU IDs
|
|
|
|
* which includes CPU IDs for present and possible to hotplug CPUs.
|
|
|
|
* Caller is responsible for freeing returned list.
|
2017-06-01 13:53:28 +03:00
|
|
|
* @get_default_cpu_node_id:
|
|
|
|
* returns default board specific node_id value for CPU slot specified by
|
|
|
|
* index @idx in @ms->possible_cpus[]
|
2017-02-10 13:20:57 +03:00
|
|
|
* @has_hotpluggable_cpus:
|
|
|
|
* If true, board supports CPUs creation with -device/device_add.
|
2017-09-13 19:04:55 +03:00
|
|
|
* @default_cpu_type:
|
|
|
|
* specifies default CPU_TYPE, which will be used for parsing target
|
|
|
|
* specific features and for creating CPUs if CPU name wasn't provided
|
|
|
|
* explicitly at CLI
|
2016-10-24 18:26:49 +03:00
|
|
|
* @minimum_page_bits:
|
|
|
|
* If non-zero, the board promises never to create a CPU with a page size
|
|
|
|
* smaller than this, so QEMU can use a more efficient larger page
|
|
|
|
* size than the target architecture's minimum. (Attempting to create
|
|
|
|
* such a CPU will fail.) Note that changing this is a migration
|
|
|
|
* compatibility break for the machine.
|
2017-09-07 15:54:54 +03:00
|
|
|
* @ignore_memory_transaction_failures:
|
|
|
|
* If this is flag is true then the CPU will ignore memory transaction
|
|
|
|
* failures which should cause the CPU to take an exception due to an
|
|
|
|
* access to an unassigned physical address; the transaction will instead
|
|
|
|
* return zero (for a read) or be ignored (for a write). This should be
|
|
|
|
* set only by legacy board models which rely on the old RAZ/WI behaviour
|
|
|
|
* for handling devices that QEMU does not yet model. New board models
|
|
|
|
* should instead use "unimplemented-device" for all memory ranges where
|
|
|
|
* the guest will attempt to probe for a device that QEMU doesn't
|
|
|
|
* implement and a stub device is required.
|
2019-03-04 13:13:33 +03:00
|
|
|
* @kvm_type:
|
|
|
|
* Return the type of KVM corresponding to the kvm-type string option or
|
|
|
|
* computed based on other criteria such as the host kernel capabilities.
|
2021-03-10 16:52:17 +03:00
|
|
|
* kvm-type may be NULL if it is not needed.
|
2019-06-10 16:10:07 +03:00
|
|
|
* @numa_mem_supported:
|
|
|
|
* true if '--numa node.mem' option is supported and false otherwise
|
2019-09-16 11:07:16 +03:00
|
|
|
* @hotplug_allowed:
|
|
|
|
* If the hook is provided, then it'll be called for each device
|
|
|
|
* hotplug to check whether the device hotplug is allowed. Return
|
|
|
|
* true to grant allowance or false to reject the hotplug. When
|
|
|
|
* false is returned, an error must be set to show the reason of
|
|
|
|
* the rejection. If the hook is not provided, all hotplug will be
|
|
|
|
* allowed.
|
2020-02-19 19:08:37 +03:00
|
|
|
* @default_ram_id:
|
2023-07-14 14:33:02 +03:00
|
|
|
* Specifies initial RAM MemoryRegion name to be used for default backend
|
2020-02-19 19:08:37 +03:00
|
|
|
* creation if user explicitly hasn't specified backend with "memory-backend"
|
|
|
|
* property.
|
2023-07-14 14:33:02 +03:00
|
|
|
* It also will be used as a way to option into "-m" option support.
|
2020-02-19 19:08:37 +03:00
|
|
|
* If it's not set by board, '-m' will be ignored and generic code will
|
|
|
|
* not create default RAM MemoryRegion.
|
2020-04-01 15:37:54 +03:00
|
|
|
* @fixup_ram_size:
|
|
|
|
* Amends user provided ram size (with -m option) using machine
|
|
|
|
* specific algorithm. To be used by old machine types for compat
|
|
|
|
* purposes only.
|
|
|
|
* Applies only to default memory backend, i.e., explicit memory backend
|
|
|
|
* wasn't used.
|
smbios: make memory device size configurable per Machine
Currently QEMU describes initial[1] RAM* in SMBIOS as a series of
virtual DIMMs (capped at 16Gb max) using type 17 structure entries.
Which is fine for the most cases. However when starting guest
with terabytes of RAM this leads to too many memory device
structures, which eventually upsets linux kernel as it reserves
only 64K for these entries and when that border is crossed out
it runs out of reserved memory.
Instead of partitioning initial RAM on 16Gb DIMMs, use maximum
possible chunk size that SMBIOS spec allows[2]. Which lets
encode RAM in lower 31 bits of 32bit field (which amounts upto
2047Tb per DIMM).
As result initial RAM will generate only one type 17 structure
until host/guest reach ability to use more RAM in the future.
Compat changes:
We can't unconditionally change chunk size as it will break
QEMU<->guest ABI (and migration). Thus introduce a new machine
class field that would let older versioned machines to use
legacy 16Gb chunks, while new(er) machine type[s] use maximum
possible chunk size.
PS:
While it might seem to be risky to rise max entry size this large
(much beyond of what current physical RAM modules support),
I'd not expect it causing much issues, modulo uncovering bugs
in software running within guest. And those should be fixed
on guest side to handle SMBIOS spec properly, especially if
guest is expected to support so huge RAM configs.
In worst case, QEMU can reduce chunk size later if we would
care enough about introducing a workaround for some 'unfixable'
guest OS, either by fixing up the next machine type or
giving users a CLI option to customize it.
1) Initial RAM - is RAM configured with help '-m SIZE' CLI option/
implicitly defined by machine. It doesn't include memory
configured with help of '-device' option[s] (pcdimm,nvdimm,...)
2) SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size
PS:
* tested on 8Tb host with RHEL6 guest, which seems to parse
type 17 SMBIOS table entries correctly (according to 'dmidecode').
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20240715122417.4059293-1-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-07-15 15:24:17 +03:00
|
|
|
* @smbios_memory_device_size:
|
|
|
|
* Default size of memory device,
|
|
|
|
* SMBIOS 3.1.0 "7.18 Memory Device (Type 17)"
|
2014-03-05 21:30:45 +04:00
|
|
|
*/
|
|
|
|
struct MachineClass {
|
|
|
|
/*< private >*/
|
|
|
|
ObjectClass parent_class;
|
|
|
|
/*< public >*/
|
|
|
|
|
well-defined listing order for machine types
Commit 261747f1 ("vl: Use MachineClass instead of global QEMUMachine
list") broke the ordering of the machine types in the user-visible output
of
qemu-system-XXXX -M \?
This occurred because registration was rebased from a manually maintained
linked list to GLib hash tables:
qemu_register_machine()
type_register()
type_register_internal()
type_table_add()
g_hash_table_insert()
and because the listing was rebased accordingly, from the traversal of the
list to the traversal of the hash table (rendered as an ad-hoc list):
machine_parse()
object_class_get_list(TYPE_MACHINE)
object_class_foreach()
g_hash_table_foreach()
The current order is a "random" one, for practical purposes, which is
annoying for users.
Introduce new members QEMUMachine.family and MachineClass.family, allowing
machine types to be "clustered". Introduce a comparator function that
establishes a total ordering between machine types, ordering machine types
in the same family next to each other. In machine_parse(), list the
supported machine types sorted with the comparator function.
The comparator function:
- sorts whole families before standalone machine types,
- sorts whole families between each other in alphabetically increasing
order,
- sorts machine types inside the same family in alphabetically decreasing
order,
- sorts standalone machine types between each other in alphabetically
increasing order.
After this patch, all machine types are considered standalone, and
accordingly, the output is alphabetically ascending. This will be refined
in the following patches.
Effects on the x86_64 output:
Before:
> Supported machines are:
> pc-0.13 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.0 Standard PC (i440FX + PIIX, 1996)
> pc-1.0 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.1 Standard PC (i440FX + PIIX, 1996)
> pc-q35-1.7 Standard PC (Q35 + ICH9, 2009)
> pc-1.1 Standard PC (i440FX + PIIX, 1996)
> pc-0.14 Standard PC (i440FX + PIIX, 1996)
> pc-q35-2.0 Standard PC (Q35 + ICH9, 2009)
> pc-i440fx-1.4 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.5 Standard PC (i440FX + PIIX, 1996)
> pc-0.15 Standard PC (i440FX + PIIX, 1996)
> pc-q35-1.4 Standard PC (Q35 + ICH9, 2009)
> isapc ISA-only PC
> pc Standard PC (i440FX + PIIX, 1996) (alias of pc-i440fx-2.2)
> pc-i440fx-2.2 Standard PC (i440FX + PIIX, 1996) (default)
> pc-1.2 Standard PC (i440FX + PIIX, 1996)
> pc-0.10 Standard PC (i440FX + PIIX, 1996)
> pc-0.11 Standard PC (i440FX + PIIX, 1996)
> pc-q35-2.1 Standard PC (Q35 + ICH9, 2009)
> q35 Standard PC (Q35 + ICH9, 2009) (alias of pc-q35-2.2)
> pc-q35-2.2 Standard PC (Q35 + ICH9, 2009)
> pc-i440fx-1.6 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.7 Standard PC (i440FX + PIIX, 1996)
> none empty machine
> pc-q35-1.5 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.6 Standard PC (Q35 + ICH9, 2009)
> pc-0.12 Standard PC (i440FX + PIIX, 1996)
> pc-1.3 Standard PC (i440FX + PIIX, 1996)
After:
> Supported machines are:
> isapc ISA-only PC
> none empty machine
> pc-0.10 Standard PC (i440FX + PIIX, 1996)
> pc-0.11 Standard PC (i440FX + PIIX, 1996)
> pc-0.12 Standard PC (i440FX + PIIX, 1996)
> pc-0.13 Standard PC (i440FX + PIIX, 1996)
> pc-0.14 Standard PC (i440FX + PIIX, 1996)
> pc-0.15 Standard PC (i440FX + PIIX, 1996)
> pc-1.0 Standard PC (i440FX + PIIX, 1996)
> pc-1.1 Standard PC (i440FX + PIIX, 1996)
> pc-1.2 Standard PC (i440FX + PIIX, 1996)
> pc-1.3 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.4 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.5 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.6 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.7 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.0 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.1 Standard PC (i440FX + PIIX, 1996)
> pc Standard PC (i440FX + PIIX, 1996) (alias of pc-i440fx-2.2)
> pc-i440fx-2.2 Standard PC (i440FX + PIIX, 1996) (default)
> pc-q35-1.4 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.5 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.6 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.7 Standard PC (Q35 + ICH9, 2009)
> pc-q35-2.0 Standard PC (Q35 + ICH9, 2009)
> pc-q35-2.1 Standard PC (Q35 + ICH9, 2009)
> q35 Standard PC (Q35 + ICH9, 2009) (alias of pc-q35-2.2)
> pc-q35-2.2 Standard PC (Q35 + ICH9, 2009)
Effects on the aarch64 output:
Before:
> Supported machines are:
> lm3s811evb Stellaris LM3S811EVB
> canon-a1100 Canon PowerShot A1100 IS
> vexpress-a15 ARM Versatile Express for Cortex-A15
> vexpress-a9 ARM Versatile Express for Cortex-A9
> xilinx-zynq-a9 Xilinx Zynq Platform Baseboard for Cortex-A9
> connex Gumstix Connex (PXA255)
> n800 Nokia N800 tablet aka. RX-34 (OMAP2420)
> lm3s6965evb Stellaris LM3S6965EVB
> versatileab ARM Versatile/AB (ARM926EJ-S)
> borzoi Borzoi PDA (PXA270)
> tosa Tosa PDA (PXA255)
> cheetah Palm Tungsten|E aka. Cheetah PDA (OMAP310)
> midway Calxeda Midway (ECX-2000)
> mainstone Mainstone II (PXA27x)
> n810 Nokia N810 tablet aka. RX-44 (OMAP2420)
> terrier Terrier PDA (PXA270)
> highbank Calxeda Highbank (ECX-1000)
> cubieboard cubietech cubieboard
> sx1-v1 Siemens SX1 (OMAP310) V1
> sx1 Siemens SX1 (OMAP310) V2
> realview-eb-mpcore ARM RealView Emulation Baseboard (ARM11MPCore)
> kzm ARM KZM Emulation Baseboard (ARM1136)
> akita Akita PDA (PXA270)
> z2 Zipit Z2 (PXA27x)
> musicpal Marvell 88w8618 / MusicPal (ARM926EJ-S)
> realview-pb-a8 ARM RealView Platform Baseboard for Cortex-A8
> versatilepb ARM Versatile/PB (ARM926EJ-S)
> realview-eb ARM RealView Emulation Baseboard (ARM926EJ-S)
> realview-pbx-a9 ARM RealView Platform Baseboard Explore for Cortex-A9
> spitz Spitz PDA (PXA270)
> none empty machine
> virt ARM Virtual Machine
> collie Collie PDA (SA-1110)
> smdkc210 Samsung SMDKC210 board (Exynos4210)
> verdex Gumstix Verdex (PXA270)
> nuri Samsung NURI board (Exynos4210)
> integratorcp ARM Integrator/CP (ARM926EJ-S)
After:
> Supported machines are:
> akita Akita PDA (PXA270)
> borzoi Borzoi PDA (PXA270)
> canon-a1100 Canon PowerShot A1100 IS
> cheetah Palm Tungsten|E aka. Cheetah PDA (OMAP310)
> collie Collie PDA (SA-1110)
> connex Gumstix Connex (PXA255)
> cubieboard cubietech cubieboard
> highbank Calxeda Highbank (ECX-1000)
> integratorcp ARM Integrator/CP (ARM926EJ-S)
> kzm ARM KZM Emulation Baseboard (ARM1136)
> lm3s6965evb Stellaris LM3S6965EVB
> lm3s811evb Stellaris LM3S811EVB
> mainstone Mainstone II (PXA27x)
> midway Calxeda Midway (ECX-2000)
> musicpal Marvell 88w8618 / MusicPal (ARM926EJ-S)
> n800 Nokia N800 tablet aka. RX-34 (OMAP2420)
> n810 Nokia N810 tablet aka. RX-44 (OMAP2420)
> none empty machine
> nuri Samsung NURI board (Exynos4210)
> realview-eb ARM RealView Emulation Baseboard (ARM926EJ-S)
> realview-eb-mpcore ARM RealView Emulation Baseboard (ARM11MPCore)
> realview-pb-a8 ARM RealView Platform Baseboard for Cortex-A8
> realview-pbx-a9 ARM RealView Platform Baseboard Explore for Cortex-A9
> smdkc210 Samsung SMDKC210 board (Exynos4210)
> spitz Spitz PDA (PXA270)
> sx1 Siemens SX1 (OMAP310) V2
> sx1-v1 Siemens SX1 (OMAP310) V1
> terrier Terrier PDA (PXA270)
> tosa Tosa PDA (PXA255)
> verdex Gumstix Verdex (PXA270)
> versatileab ARM Versatile/AB (ARM926EJ-S)
> versatilepb ARM Versatile/PB (ARM926EJ-S)
> vexpress-a15 ARM Versatile Express for Cortex-A15
> vexpress-a9 ARM Versatile Express for Cortex-A9
> virt ARM Virtual Machine
> xilinx-zynq-a9 Xilinx Zynq Platform Baseboard for Cortex-A9
> z2 Zipit Z2 (PXA27x)
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1145042
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel.a@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
2014-09-23 00:38:35 +04:00
|
|
|
const char *family; /* NULL iff @name identifies a standalone machtype */
|
2016-07-13 13:45:50 +03:00
|
|
|
char *name;
|
2014-04-09 21:34:50 +04:00
|
|
|
const char *alias;
|
|
|
|
const char *desc;
|
2018-06-25 12:05:12 +03:00
|
|
|
const char *deprecation_reason;
|
2014-04-09 21:34:50 +04:00
|
|
|
|
2014-05-07 18:42:57 +04:00
|
|
|
void (*init)(MachineState *state);
|
2022-10-25 03:43:17 +03:00
|
|
|
void (*reset)(MachineState *state, ShutdownCause reason);
|
2019-07-22 08:32:13 +03:00
|
|
|
void (*wakeup)(MachineState *state);
|
2019-03-04 13:13:33 +03:00
|
|
|
int (*kvm_type)(MachineState *machine, const char *arg);
|
2014-04-09 21:34:50 +04:00
|
|
|
|
|
|
|
BlockInterfaceType block_default_type;
|
pc/vl: Add units-per-default-bus property
This patch adds the 'units_per_default_bus' property which
allows individual boards to declare their desired
index => (bus,unit) mapping for their default HBA, so that
boards such as Q35 can specify that its default if_ide HBA,
AHCI, only accepts one unit per bus.
This property only overrides the mapping for drives matching
the block_default_type interface.
This patch also adds this property to *all* past and present
Q35 machine types. This retroactive addition is justified
because the previous erroneous index=>(bus,unit) mappings
caused by lack of such a property were not utilized due to
lack of initialization code in the Q35 init routine.
Further, semantically, the Q35 board type has always had the
property that its default HBA, AHCI, only accepts one unit per
bus. The new code added to add devices to drives relies upon
the accuracy of this mapping. Thus, the property is applied
retroactively to reduce complexity of allowing IDE HBAs with
different units per bus.
Examples:
Prior to this patch, all IDE HBAs were assumed to use 2 units
per bus (Master, Slave). When using Q35 and AHCI, however, we
only allow one unit per bus.
-hdb foo.qcow2 would become index=1, or bus=0,unit=1.
-hdd foo.qcow2 would become index=3, or bus=1,unit=1.
-drive file=foo.qcow2,index=5 becomes bus=2,unit=1.
These are invalid for AHCI. They now become, under Q35 only:
-hdb foo.qcow2 --> index=1, bus=1, unit=0.
-hdd foo.qcow2 --> index=3, bus=3, unit=0.
-drive file=foo.qcow2,index=5 --> bus=5,unit=0.
The mapping is adjusted based on the fact that the default IF
for the Q35 machine type is IF_IDE, and units-per-default-bus
overrides the IDE mapping from its default of 2 units per bus
to just 1 unit per bus.
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1412187569-23452-4-git-send-email-jsnow@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-10-01 22:19:26 +04:00
|
|
|
int units_per_default_bus;
|
2014-04-09 21:34:50 +04:00
|
|
|
int max_cpus;
|
2017-11-13 16:55:27 +03:00
|
|
|
int min_cpus;
|
|
|
|
int default_cpus;
|
2014-04-09 21:34:50 +04:00
|
|
|
unsigned int no_serial:1,
|
|
|
|
no_parallel:1,
|
|
|
|
no_floppy:1,
|
|
|
|
no_cdrom:1,
|
2014-07-01 18:14:41 +04:00
|
|
|
no_sdcard:1,
|
2016-04-07 17:12:58 +03:00
|
|
|
pci_allow_0_address:1,
|
|
|
|
legacy_fw_cfg_order:1;
|
2020-02-07 19:19:47 +03:00
|
|
|
bool is_default;
|
2014-04-09 21:34:50 +04:00
|
|
|
const char *default_machine_opts;
|
|
|
|
const char *default_boot_order;
|
2014-10-28 12:09:11 +03:00
|
|
|
const char *default_display;
|
2023-05-10 21:46:21 +03:00
|
|
|
const char *default_nic;
|
2018-12-01 22:44:11 +03:00
|
|
|
GPtrArray *compat_props;
|
2014-04-09 21:34:50 +04:00
|
|
|
const char *hw_version;
|
2015-05-07 08:33:57 +03:00
|
|
|
ram_addr_t default_ram_size;
|
2017-09-13 19:04:55 +03:00
|
|
|
const char *default_cpu_type;
|
2018-12-20 08:40:35 +03:00
|
|
|
bool default_kernel_irqchip_split;
|
2015-12-02 01:58:08 +03:00
|
|
|
bool option_rom_has_mr;
|
|
|
|
bool rom_file_has_mr;
|
2016-10-24 18:26:49 +03:00
|
|
|
int minimum_page_bits;
|
2017-02-10 13:20:57 +03:00
|
|
|
bool has_hotpluggable_cpus;
|
2017-09-07 15:54:54 +03:00
|
|
|
bool ignore_memory_transaction_failures;
|
2017-03-21 13:25:42 +03:00
|
|
|
int numa_mem_align_shift;
|
2023-11-17 10:17:01 +03:00
|
|
|
const char * const *valid_cpu_types;
|
2017-11-25 18:16:05 +03:00
|
|
|
strList *allowed_dynamic_sysbus_devices;
|
2017-11-14 05:34:01 +03:00
|
|
|
bool auto_enable_numa_with_memhp;
|
2020-06-26 10:22:48 +03:00
|
|
|
bool auto_enable_numa_with_memdev;
|
2018-08-10 15:40:27 +03:00
|
|
|
bool ignore_boot_device_suffixes;
|
2018-11-14 23:41:01 +03:00
|
|
|
bool smbus_no_migration_support;
|
2019-03-08 21:20:53 +03:00
|
|
|
bool nvdimm_supported;
|
2019-06-10 16:10:07 +03:00
|
|
|
bool numa_mem_supported;
|
2019-09-05 11:32:38 +03:00
|
|
|
bool auto_enable_numa;
|
numa: Validate cluster and NUMA node boundary if required
For some architectures like ARM64, multiple CPUs in one cluster can be
associated with different NUMA nodes, which is irregular configuration
because we shouldn't have this in baremetal environment. The irregular
configuration causes Linux guest to misbehave, as the following warning
messages indicate.
-smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \
-numa node,nodeid=0,cpus=0-1,memdev=ram0 \
-numa node,nodeid=1,cpus=2-3,memdev=ram1 \
-numa node,nodeid=2,cpus=4-5,memdev=ram2 \
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1
pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : build_sched_domains+0x284/0x910
lr : build_sched_domains+0x184/0x910
sp : ffff80000804bd50
x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000
x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840
x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508
x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014
x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e
x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0
x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041
x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001
x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002
x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001
Call trace:
build_sched_domains+0x284/0x910
sched_init_domains+0xac/0xe0
sched_init_smp+0x48/0xc8
kernel_init_freeable+0x140/0x1ac
kernel_init+0x28/0x140
ret_from_fork+0x10/0x20
Improve the situation to warn when multiple CPUs in one cluster have
been associated with different NUMA nodes. However, one NUMA node is
allowed to be associated with different clusters.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20230509002739.18388-2-gshan@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2023-05-09 03:27:37 +03:00
|
|
|
bool cpu_cluster_has_numa_boundary;
|
2021-09-29 05:58:12 +03:00
|
|
|
SMPCompatProps smp_props;
|
2020-02-19 19:08:37 +03:00
|
|
|
const char *default_ram_id;
|
2014-06-02 17:25:03 +04:00
|
|
|
|
|
|
|
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
|
|
|
|
DeviceState *dev);
|
2019-09-16 11:07:16 +03:00
|
|
|
bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
|
|
|
|
Error **errp);
|
2017-05-10 14:29:45 +03:00
|
|
|
CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
|
|
|
|
unsigned cpu_index);
|
2017-01-18 20:13:20 +03:00
|
|
|
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
|
2017-06-01 13:53:28 +03:00
|
|
|
int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
|
2020-04-01 15:37:54 +03:00
|
|
|
ram_addr_t (*fixup_ram_size)(ram_addr_t size);
|
smbios: make memory device size configurable per Machine
Currently QEMU describes initial[1] RAM* in SMBIOS as a series of
virtual DIMMs (capped at 16Gb max) using type 17 structure entries.
Which is fine for the most cases. However when starting guest
with terabytes of RAM this leads to too many memory device
structures, which eventually upsets linux kernel as it reserves
only 64K for these entries and when that border is crossed out
it runs out of reserved memory.
Instead of partitioning initial RAM on 16Gb DIMMs, use maximum
possible chunk size that SMBIOS spec allows[2]. Which lets
encode RAM in lower 31 bits of 32bit field (which amounts upto
2047Tb per DIMM).
As result initial RAM will generate only one type 17 structure
until host/guest reach ability to use more RAM in the future.
Compat changes:
We can't unconditionally change chunk size as it will break
QEMU<->guest ABI (and migration). Thus introduce a new machine
class field that would let older versioned machines to use
legacy 16Gb chunks, while new(er) machine type[s] use maximum
possible chunk size.
PS:
While it might seem to be risky to rise max entry size this large
(much beyond of what current physical RAM modules support),
I'd not expect it causing much issues, modulo uncovering bugs
in software running within guest. And those should be fixed
on guest side to handle SMBIOS spec properly, especially if
guest is expected to support so huge RAM configs.
In worst case, QEMU can reduce chunk size later if we would
care enough about introducing a workaround for some 'unfixable'
guest OS, either by fixing up the next machine type or
giving users a CLI option to customize it.
1) Initial RAM - is RAM configured with help '-m SIZE' CLI option/
implicitly defined by machine. It doesn't include memory
configured with help of '-device' option[s] (pcdimm,nvdimm,...)
2) SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size
PS:
* tested on 8Tb host with RHEL6 guest, which seems to parse
type 17 SMBIOS table entries correctly (according to 'dmidecode').
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20240715122417.4059293-1-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-07-15 15:24:17 +03:00
|
|
|
uint64_t smbios_memory_device_size;
|
2014-03-05 21:30:45 +04:00
|
|
|
};
|
|
|
|
|
2018-04-23 19:51:17 +03:00
|
|
|
/**
|
2018-04-23 19:51:23 +03:00
|
|
|
* DeviceMemoryState:
|
2018-04-23 19:51:17 +03:00
|
|
|
* @base: address in guest physical address space where the memory
|
|
|
|
* address space for memory devices starts
|
2023-09-26 21:57:28 +03:00
|
|
|
* @mr: memory region container for memory devices
|
|
|
|
* @as: address space for memory devices
|
|
|
|
* @listener: memory listener used to track used memslots in the address space
|
2023-03-09 10:52:57 +03:00
|
|
|
* @dimm_size: the sum of plugged DIMMs' sizes
|
2023-06-23 15:45:53 +03:00
|
|
|
* @used_region_size: the part of @mr already used by memory devices
|
2023-09-26 21:57:28 +03:00
|
|
|
* @required_memslots: the number of memslots required by memory devices
|
|
|
|
* @used_memslots: the number of memslots currently used by memory devices
|
memory-device,vhost: Support automatic decision on the number of memslots
We want to support memory devices that can automatically decide how many
memslots they will use. In the worst case, they have to use a single
memslot.
The target use cases are virtio-mem and the hyper-v balloon.
Let's calculate a reasonable limit such a memory device may use, and
instruct the device to make a decision based on that limit. Use a simple
heuristic that considers:
* A memslot soft-limit for all memory devices of 256; also, to not
consume too many memslots -- which could harm performance.
* Actually still free and unreserved memslots
* The percentage of the remaining device memory region that memory device
will occupy.
Further, while we properly check before plugging a memory device whether
there still is are free memslots, we have other memslot consumers (such as
boot memory, PCI BARs) that don't perform any checks and might dynamically
consume memslots without any prior reservation. So we might succeed in
plugging a memory device, but once we dynamically map a PCI BAR we would
be in trouble. Doing accounting / reservation / checks for all such
users is problematic (e.g., sometimes we might temporarily split boot
memory into two memslots, triggered by the BIOS).
We use the historic magic memslot number of 509 as orientation to when
supporting 256 memory devices -> memslots (leaving 253 for boot memory and
other devices) has been proven to work reliable. We'll fallback to
suggesting a single memslot if we don't have at least 509 total memslots.
Plugging vhost devices with less than 509 memslots available while we
have memory devices plugged that consume multiple memslots due to
automatic decisions can be problematic. Most configurations might just fail
due to "limit < used + reserved", however, it can also happen that these
memory devices would suddenly consume memslots that would actually be
required by other memslot consumers (boot, PCI BARs) later. Note that this
has always been sketchy with vhost devices that support only a small number
of memslots; but we don't want to make it any worse.So let's keep it simple
and simply reject plugging such vhost devices in such a configuration.
Eventually, all vhost devices that want to be fully compatible with such
memory devices should support a decent number of memslots (>= 509).
Message-ID: <20230926185738.277351-13-david@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-26 21:57:32 +03:00
|
|
|
* @memslot_auto_decision_active: whether any plugged memory device
|
|
|
|
* automatically decided to use more than
|
|
|
|
* one memslot
|
2018-04-23 19:51:17 +03:00
|
|
|
*/
|
2018-04-23 19:51:23 +03:00
|
|
|
typedef struct DeviceMemoryState {
|
2018-04-23 19:51:17 +03:00
|
|
|
hwaddr base;
|
|
|
|
MemoryRegion mr;
|
2023-09-26 21:57:28 +03:00
|
|
|
AddressSpace as;
|
|
|
|
MemoryListener listener;
|
2023-03-09 10:52:57 +03:00
|
|
|
uint64_t dimm_size;
|
2023-06-23 15:45:53 +03:00
|
|
|
uint64_t used_region_size;
|
2023-09-26 21:57:28 +03:00
|
|
|
unsigned int required_memslots;
|
|
|
|
unsigned int used_memslots;
|
memory-device,vhost: Support automatic decision on the number of memslots
We want to support memory devices that can automatically decide how many
memslots they will use. In the worst case, they have to use a single
memslot.
The target use cases are virtio-mem and the hyper-v balloon.
Let's calculate a reasonable limit such a memory device may use, and
instruct the device to make a decision based on that limit. Use a simple
heuristic that considers:
* A memslot soft-limit for all memory devices of 256; also, to not
consume too many memslots -- which could harm performance.
* Actually still free and unreserved memslots
* The percentage of the remaining device memory region that memory device
will occupy.
Further, while we properly check before plugging a memory device whether
there still is are free memslots, we have other memslot consumers (such as
boot memory, PCI BARs) that don't perform any checks and might dynamically
consume memslots without any prior reservation. So we might succeed in
plugging a memory device, but once we dynamically map a PCI BAR we would
be in trouble. Doing accounting / reservation / checks for all such
users is problematic (e.g., sometimes we might temporarily split boot
memory into two memslots, triggered by the BIOS).
We use the historic magic memslot number of 509 as orientation to when
supporting 256 memory devices -> memslots (leaving 253 for boot memory and
other devices) has been proven to work reliable. We'll fallback to
suggesting a single memslot if we don't have at least 509 total memslots.
Plugging vhost devices with less than 509 memslots available while we
have memory devices plugged that consume multiple memslots due to
automatic decisions can be problematic. Most configurations might just fail
due to "limit < used + reserved", however, it can also happen that these
memory devices would suddenly consume memslots that would actually be
required by other memslot consumers (boot, PCI BARs) later. Note that this
has always been sketchy with vhost devices that support only a small number
of memslots; but we don't want to make it any worse.So let's keep it simple
and simply reject plugging such vhost devices in such a configuration.
Eventually, all vhost devices that want to be fully compatible with such
memory devices should support a decent number of memslots (>= 509).
Message-ID: <20230926185738.277351-13-david@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-26 21:57:32 +03:00
|
|
|
unsigned int memslot_auto_decision_active;
|
2018-04-23 19:51:23 +03:00
|
|
|
} DeviceMemoryState;
|
2018-04-23 19:51:17 +03:00
|
|
|
|
2019-05-18 23:54:19 +03:00
|
|
|
/**
|
|
|
|
* CpuTopology:
|
|
|
|
* @cpus: the number of present logical processors on the machine
|
2023-10-16 21:39:06 +03:00
|
|
|
* @drawers: the number of drawers on the machine
|
|
|
|
* @books: the number of books in one drawer
|
|
|
|
* @sockets: the number of sockets in one book
|
2021-09-29 05:58:11 +03:00
|
|
|
* @dies: the number of dies in one socket
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
|
|
|
* @clusters: the number of clusters in one die
|
hw/core/machine: Introduce the module as a CPU topology level
In x86, module is the topology level above core, which contains a set
of cores that share certain resources (in current products, the resource
usually includes L2 cache, as well as module scoped features and MSRs).
Though smp.clusters could also share the L2 cache resource [1], there
are following reasons that drive us to introduce the new smp.modules:
* As the CPU topology abstraction in device tree [2], cluster supports
nesting (though currently QEMU hasn't support that). In contrast,
(x86) module does not support nesting.
* Due to nesting, there is great flexibility in sharing resources
on cluster, rather than narrowing cluster down to sharing L2 (and
L3 tags) as the lowest topology level that contains cores.
* Flexible nesting of cluster allows it to correspond to any level
between the x86 package and core.
* In Linux kernel, x86's cluster only represents the L2 cache domain
but QEMU's smp.clusters is the CPU topology level. Linux kernel will
also expose module level topology information in sysfs for x86. To
avoid cluster ambiguity and keep a consistent CPU topology naming
style with the Linux kernel, we introduce module level for x86.
The module is, in existing hardware practice, the lowest layer that
contains the core, while the cluster is able to have a higher
topological scope than the module due to its nesting.
Therefore, place the module between the cluster and the core:
drawer/book/socket/die/cluster/module/core/thread
With the above topological hierarchy order, introduce module level
support in MachineState and MachineClass.
[1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/
[2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt
Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
|
|
|
* @modules: the number of modules in one cluster
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
|
|
|
* @cores: the number of cores in one cluster
|
2021-09-29 05:58:11 +03:00
|
|
|
* @threads: the number of threads in one core
|
2019-05-18 23:54:19 +03:00
|
|
|
* @max_cpus: the maximum number of logical processors on the machine
|
|
|
|
*/
|
|
|
|
typedef struct CpuTopology {
|
|
|
|
unsigned int cpus;
|
2023-10-16 21:39:06 +03:00
|
|
|
unsigned int drawers;
|
|
|
|
unsigned int books;
|
2021-09-29 05:58:11 +03:00
|
|
|
unsigned int sockets;
|
2021-06-17 18:53:03 +03:00
|
|
|
unsigned int dies;
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
|
|
|
unsigned int clusters;
|
hw/core/machine: Introduce the module as a CPU topology level
In x86, module is the topology level above core, which contains a set
of cores that share certain resources (in current products, the resource
usually includes L2 cache, as well as module scoped features and MSRs).
Though smp.clusters could also share the L2 cache resource [1], there
are following reasons that drive us to introduce the new smp.modules:
* As the CPU topology abstraction in device tree [2], cluster supports
nesting (though currently QEMU hasn't support that). In contrast,
(x86) module does not support nesting.
* Due to nesting, there is great flexibility in sharing resources
on cluster, rather than narrowing cluster down to sharing L2 (and
L3 tags) as the lowest topology level that contains cores.
* Flexible nesting of cluster allows it to correspond to any level
between the x86 package and core.
* In Linux kernel, x86's cluster only represents the L2 cache domain
but QEMU's smp.clusters is the CPU topology level. Linux kernel will
also expose module level topology information in sysfs for x86. To
avoid cluster ambiguity and keep a consistent CPU topology naming
style with the Linux kernel, we introduce module level for x86.
The module is, in existing hardware practice, the lowest layer that
contains the core, while the cluster is able to have a higher
topological scope than the module due to its nesting.
Therefore, place the module between the cluster and the core:
drawer/book/socket/die/cluster/module/core/thread
With the above topological hierarchy order, introduce module level
support in MachineState and MachineClass.
[1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/
[2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt
Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
|
|
|
unsigned int modules;
|
2019-05-18 23:54:19 +03:00
|
|
|
unsigned int cores;
|
|
|
|
unsigned int threads;
|
|
|
|
unsigned int max_cpus;
|
|
|
|
} CpuTopology;
|
|
|
|
|
2014-03-05 21:30:45 +04:00
|
|
|
/**
|
|
|
|
* MachineState:
|
|
|
|
*/
|
|
|
|
struct MachineState {
|
|
|
|
/*< private >*/
|
|
|
|
Object parent_obj;
|
2014-07-01 18:14:41 +04:00
|
|
|
|
2014-03-05 21:30:45 +04:00
|
|
|
/*< public >*/
|
|
|
|
|
2021-03-03 20:36:36 +03:00
|
|
|
void *fdt;
|
2014-03-05 21:30:45 +04:00
|
|
|
char *dtb;
|
|
|
|
char *dumpdtb;
|
|
|
|
int phandle_start;
|
|
|
|
char *dt_compatible;
|
|
|
|
bool dump_guest_core;
|
|
|
|
bool mem_merge;
|
|
|
|
bool usb;
|
2015-03-23 20:05:28 +03:00
|
|
|
bool usb_disabled;
|
2014-03-05 21:30:45 +04:00
|
|
|
char *firmware;
|
2014-08-16 09:55:40 +04:00
|
|
|
bool iommu;
|
2015-02-23 15:56:42 +03:00
|
|
|
bool suppress_vmdesc;
|
2016-04-19 22:55:25 +03:00
|
|
|
bool enable_graphics;
|
2020-12-04 04:51:51 +03:00
|
|
|
ConfidentialGuestSupport *cgs;
|
2022-04-14 19:52:59 +03:00
|
|
|
HostMemoryBackend *memdev;
|
2020-02-19 19:08:38 +03:00
|
|
|
/*
|
|
|
|
* convenience alias to ram_memdev_id backend memory region
|
|
|
|
* or to numa container memory region
|
|
|
|
*/
|
|
|
|
MemoryRegion *ram;
|
2018-04-23 19:51:23 +03:00
|
|
|
DeviceMemoryState *device_memory;
|
2014-03-05 21:30:45 +04:00
|
|
|
|
2022-04-25 11:21:50 +03:00
|
|
|
/*
|
|
|
|
* Included in MachineState for simplicity, but not supported
|
|
|
|
* unless machine_add_audiodev_property is called. Boards
|
|
|
|
* that have embedded audio devices can call it from the
|
|
|
|
* machine init function and forward the property to the device.
|
|
|
|
*/
|
|
|
|
char *audiodev;
|
|
|
|
|
2014-05-07 18:42:57 +04:00
|
|
|
ram_addr_t ram_size;
|
2014-06-02 17:25:02 +04:00
|
|
|
ram_addr_t maxram_size;
|
|
|
|
uint64_t ram_slots;
|
2022-04-14 19:52:56 +03:00
|
|
|
BootConfiguration boot_config;
|
2014-05-26 16:40:58 +04:00
|
|
|
char *kernel_filename;
|
|
|
|
char *kernel_cmdline;
|
|
|
|
char *initrd_filename;
|
2017-09-13 19:04:55 +03:00
|
|
|
const char *cpu_type;
|
2014-09-27 00:45:31 +04:00
|
|
|
AccelState *accelerator;
|
2017-02-09 14:08:32 +03:00
|
|
|
CPUArchIdList *possible_cpus;
|
2019-05-18 23:54:19 +03:00
|
|
|
CpuTopology smp;
|
2019-03-08 21:20:53 +03:00
|
|
|
struct NVDIMMState *nvdimms_state;
|
2019-08-09 09:57:22 +03:00
|
|
|
struct NumaState *numa_state;
|
2014-03-05 21:30:45 +04:00
|
|
|
};
|
|
|
|
|
2024-06-20 19:57:29 +03:00
|
|
|
/*
|
|
|
|
* The macros which follow are intended to facilitate the
|
|
|
|
* definition of versioned machine types, using a somewhat
|
|
|
|
* similar pattern across targets.
|
|
|
|
*
|
|
|
|
* For example, a macro that can be used to define versioned
|
|
|
|
* 'virt' machine types would look like:
|
|
|
|
*
|
|
|
|
* #define DEFINE_VIRT_MACHINE_IMPL(latest, ...) \
|
|
|
|
* static void MACHINE_VER_SYM(class_init, virt, __VA_ARGS__)( \
|
|
|
|
* ObjectClass *oc, \
|
|
|
|
* void *data) \
|
|
|
|
* { \
|
|
|
|
* MachineClass *mc = MACHINE_CLASS(oc); \
|
|
|
|
* MACHINE_VER_SYM(options, virt, __VA_ARGS__)(mc); \
|
|
|
|
* mc->desc = "QEMU " MACHINE_VER_STR(__VA_ARGS__) " Virtual Machine"; \
|
2024-06-20 19:57:36 +03:00
|
|
|
* MACHINE_VER_DEPRECATION(__VA_ARGS__); \
|
2024-06-20 19:57:29 +03:00
|
|
|
* if (latest) { \
|
|
|
|
* mc->alias = "virt"; \
|
|
|
|
* } \
|
|
|
|
* } \
|
|
|
|
* static const TypeInfo MACHINE_VER_SYM(info, virt, __VA_ARGS__) = { \
|
|
|
|
* .name = MACHINE_VER_TYPE_NAME("virt", __VA_ARGS__), \
|
|
|
|
* .parent = TYPE_VIRT_MACHINE, \
|
|
|
|
* .class_init = MACHINE_VER_SYM(class_init, virt, __VA_ARGS__), \
|
|
|
|
* }; \
|
|
|
|
* static void MACHINE_VER_SYM(register, virt, __VA_ARGS__)(void) \
|
|
|
|
* { \
|
2024-06-20 19:57:36 +03:00
|
|
|
* MACHINE_VER_DELETION(__VA_ARGS__); \
|
2024-06-20 19:57:29 +03:00
|
|
|
* type_register_static(&MACHINE_VER_SYM(info, virt, __VA_ARGS__)); \
|
|
|
|
* } \
|
|
|
|
* type_init(MACHINE_VER_SYM(register, virt, __VA_ARGS__));
|
|
|
|
*
|
|
|
|
* Following this, one (or more) helpers can be added for
|
|
|
|
* whichever scenarios need to be catered for with a machine:
|
|
|
|
*
|
|
|
|
* // Normal 2 digit, marked as latest e.g. 'virt-9.0'
|
|
|
|
* #define DEFINE_VIRT_MACHINE_LATEST(major, minor) \
|
|
|
|
* DEFINE_VIRT_MACHINE_IMPL(true, major, minor)
|
|
|
|
*
|
|
|
|
* // Normal 2 digit e.g. 'virt-9.0'
|
|
|
|
* #define DEFINE_VIRT_MACHINE(major, minor) \
|
|
|
|
* DEFINE_VIRT_MACHINE_IMPL(false, major, minor)
|
|
|
|
*
|
|
|
|
* // Bugfix 3 digit e.g. 'virt-9.0.1'
|
|
|
|
* #define DEFINE_VIRT_MACHINE_BUGFIX(major, minor, micro) \
|
|
|
|
* DEFINE_VIRT_MACHINE_IMPL(false, major, minor, micro)
|
|
|
|
*
|
|
|
|
* // Tagged 2 digit e.g. 'virt-9.0-extra'
|
|
|
|
* #define DEFINE_VIRT_MACHINE_TAGGED(major, minor, tag) \
|
|
|
|
* DEFINE_VIRT_MACHINE_IMPL(false, major, minor, _, tag)
|
|
|
|
*
|
|
|
|
* // Tagged bugfix 2 digit e.g. 'virt-9.0.1-extra'
|
|
|
|
* #define DEFINE_VIRT_MACHINE_TAGGED(major, minor, micro, tag) \
|
|
|
|
* DEFINE_VIRT_MACHINE_IMPL(false, major, minor, micro, _, tag)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper for dispatching different macros based on how
|
|
|
|
* many __VA_ARGS__ are passed. Supports 1 to 5 variadic
|
|
|
|
* arguments, with the called target able to be prefixed
|
|
|
|
* with 0 or more fixed arguments too. To be called thus:
|
|
|
|
*
|
|
|
|
* _MACHINE_VER_PICK(__VA_ARGS,
|
|
|
|
* MACRO_MATCHING_5_ARGS,
|
|
|
|
* MACRO_MATCHING_4_ARGS,
|
|
|
|
* MACRO_MATCHING_3_ARGS,
|
|
|
|
* MACRO_MATCHING_2_ARGS,
|
|
|
|
* MACRO_MATCHING_1_ARG) (FIXED-ARG-1,
|
|
|
|
* ...,
|
|
|
|
* FIXED-ARG-N,
|
|
|
|
* __VA_ARGS__)
|
|
|
|
*/
|
|
|
|
#define _MACHINE_VER_PICK(x1, x2, x3, x4, x5, x6, ...) x6
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Construct a human targeted machine version string.
|
|
|
|
*
|
|
|
|
* Can be invoked with various signatures
|
|
|
|
*
|
|
|
|
* MACHINE_VER_STR(sym, prefix, major, minor)
|
|
|
|
* MACHINE_VER_STR(sym, prefix, major, minor, micro)
|
|
|
|
* MACHINE_VER_STR(sym, prefix, major, minor, _, tag)
|
|
|
|
* MACHINE_VER_STR(sym, prefix, major, minor, micro, _, tag)
|
|
|
|
*
|
|
|
|
* Respectively emitting symbols with the format
|
|
|
|
*
|
|
|
|
* "{major}.{minor}"
|
|
|
|
* "{major}.{minor}-{tag}"
|
|
|
|
* "{major}.{minor}.{micro}"
|
|
|
|
* "{major}.{minor}.{micro}-{tag}"
|
|
|
|
*/
|
|
|
|
#define _MACHINE_VER_STR2(major, minor) \
|
|
|
|
#major "." #minor
|
|
|
|
|
|
|
|
#define _MACHINE_VER_STR3(major, minor, micro) \
|
|
|
|
#major "." #minor "." #micro
|
|
|
|
|
|
|
|
#define _MACHINE_VER_STR4(major, minor, _unused_, tag) \
|
|
|
|
#major "." #minor "-" #tag
|
|
|
|
|
|
|
|
#define _MACHINE_VER_STR5(major, minor, micro, _unused_, tag) \
|
|
|
|
#major "." #minor "." #micro "-" #tag
|
|
|
|
|
|
|
|
#define MACHINE_VER_STR(...) \
|
|
|
|
_MACHINE_VER_PICK(__VA_ARGS__, \
|
|
|
|
_MACHINE_VER_STR5, \
|
|
|
|
_MACHINE_VER_STR4, \
|
|
|
|
_MACHINE_VER_STR3, \
|
|
|
|
_MACHINE_VER_STR2) (__VA_ARGS__)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Construct a QAPI type name for a versioned machine
|
|
|
|
* type
|
|
|
|
*
|
|
|
|
* Can be invoked with various signatures
|
|
|
|
*
|
|
|
|
* MACHINE_VER_TYPE_NAME(prefix, major, minor)
|
|
|
|
* MACHINE_VER_TYPE_NAME(prefix, major, minor, micro)
|
|
|
|
* MACHINE_VER_TYPE_NAME(prefix, major, minor, _, tag)
|
|
|
|
* MACHINE_VER_TYPE_NAME(prefix, major, minor, micro, _, tag)
|
|
|
|
*
|
|
|
|
* Respectively emitting symbols with the format
|
|
|
|
*
|
|
|
|
* "{prefix}-{major}.{minor}"
|
|
|
|
* "{prefix}-{major}.{minor}.{micro}"
|
|
|
|
* "{prefix}-{major}.{minor}-{tag}"
|
|
|
|
* "{prefix}-{major}.{minor}.{micro}-{tag}"
|
|
|
|
*/
|
|
|
|
#define _MACHINE_VER_TYPE_NAME2(prefix, major, minor) \
|
|
|
|
prefix "-" #major "." #minor TYPE_MACHINE_SUFFIX
|
|
|
|
|
|
|
|
#define _MACHINE_VER_TYPE_NAME3(prefix, major, minor, micro) \
|
|
|
|
prefix "-" #major "." #minor "." #micro TYPE_MACHINE_SUFFIX
|
|
|
|
|
|
|
|
#define _MACHINE_VER_TYPE_NAME4(prefix, major, minor, _unused_, tag) \
|
|
|
|
prefix "-" #major "." #minor "-" #tag TYPE_MACHINE_SUFFIX
|
|
|
|
|
|
|
|
#define _MACHINE_VER_TYPE_NAME5(prefix, major, minor, micro, _unused_, tag) \
|
|
|
|
prefix "-" #major "." #minor "." #micro "-" #tag TYPE_MACHINE_SUFFIX
|
|
|
|
|
|
|
|
#define MACHINE_VER_TYPE_NAME(prefix, ...) \
|
|
|
|
_MACHINE_VER_PICK(__VA_ARGS__, \
|
|
|
|
_MACHINE_VER_TYPE_NAME5, \
|
|
|
|
_MACHINE_VER_TYPE_NAME4, \
|
|
|
|
_MACHINE_VER_TYPE_NAME3, \
|
|
|
|
_MACHINE_VER_TYPE_NAME2) (prefix, __VA_ARGS__)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Construct a name for a versioned machine type that is
|
|
|
|
* suitable for use as a C symbol (function/variable/etc).
|
|
|
|
*
|
|
|
|
* Can be invoked with various signatures
|
|
|
|
*
|
|
|
|
* MACHINE_VER_SYM(sym, prefix, major, minor)
|
|
|
|
* MACHINE_VER_SYM(sym, prefix, major, minor, micro)
|
|
|
|
* MACHINE_VER_SYM(sym, prefix, major, minor, _, tag)
|
|
|
|
* MACHINE_VER_SYM(sym, prefix, major, minor, micro, _, tag)
|
|
|
|
*
|
|
|
|
* Respectively emitting symbols with the format
|
|
|
|
*
|
|
|
|
* {prefix}_machine_{major}_{minor}_{sym}
|
|
|
|
* {prefix}_machine_{major}_{minor}_{micro}_{sym}
|
|
|
|
* {prefix}_machine_{major}_{minor}_{tag}_{sym}
|
|
|
|
* {prefix}_machine_{major}_{minor}_{micro}_{tag}_{sym}
|
|
|
|
*/
|
|
|
|
#define _MACHINE_VER_SYM2(sym, prefix, major, minor) \
|
|
|
|
prefix ## _machine_ ## major ## _ ## minor ## _ ## sym
|
|
|
|
|
|
|
|
#define _MACHINE_VER_SYM3(sym, prefix, major, minor, micro) \
|
|
|
|
prefix ## _machine_ ## major ## _ ## minor ## _ ## micro ## _ ## sym
|
|
|
|
|
|
|
|
#define _MACHINE_VER_SYM4(sym, prefix, major, minor, _unused_, tag) \
|
|
|
|
prefix ## _machine_ ## major ## _ ## minor ## _ ## tag ## _ ## sym
|
|
|
|
|
|
|
|
#define _MACHINE_VER_SYM5(sym, prefix, major, minor, micro, _unused_, tag) \
|
|
|
|
prefix ## _machine_ ## major ## _ ## minor ## _ ## micro ## _ ## tag ## _ ## sym
|
|
|
|
|
|
|
|
#define MACHINE_VER_SYM(sym, prefix, ...) \
|
|
|
|
_MACHINE_VER_PICK(__VA_ARGS__, \
|
|
|
|
_MACHINE_VER_SYM5, \
|
|
|
|
_MACHINE_VER_SYM4, \
|
|
|
|
_MACHINE_VER_SYM3, \
|
|
|
|
_MACHINE_VER_SYM2) (sym, prefix, __VA_ARGS__)
|
|
|
|
|
|
|
|
|
2024-06-20 19:57:36 +03:00
|
|
|
/*
|
|
|
|
* How many years/major releases for each phase
|
|
|
|
* of the life cycle. Assumes use of versioning
|
|
|
|
* scheme where major is bumped each year
|
|
|
|
*/
|
|
|
|
#define MACHINE_VER_DELETION_MAJOR 6
|
|
|
|
#define MACHINE_VER_DEPRECATION_MAJOR 3
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Expands to a static string containing a deprecation
|
|
|
|
* message for a versioned machine type
|
|
|
|
*/
|
|
|
|
#define MACHINE_VER_DEPRECATION_MSG \
|
|
|
|
"machines more than " stringify(MACHINE_VER_DEPRECATION_MAJOR) \
|
|
|
|
" years old are subject to deletion after " \
|
|
|
|
stringify(MACHINE_VER_DELETION_MAJOR) " years"
|
|
|
|
|
|
|
|
#define _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) \
|
|
|
|
(((QEMU_VERSION_MAJOR - major) > cutoff) || \
|
|
|
|
(((QEMU_VERSION_MAJOR - major) == cutoff) && \
|
|
|
|
(QEMU_VERSION_MINOR - minor) >= 0))
|
|
|
|
|
|
|
|
#define _MACHINE_VER_IS_EXPIRED2(cutoff, major, minor) \
|
|
|
|
_MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
|
|
|
|
#define _MACHINE_VER_IS_EXPIRED3(cutoff, major, minor, micro) \
|
|
|
|
_MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
|
|
|
|
#define _MACHINE_VER_IS_EXPIRED4(cutoff, major, minor, _unused, tag) \
|
|
|
|
_MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
|
|
|
|
#define _MACHINE_VER_IS_EXPIRED5(cutoff, major, minor, micro, _unused, tag) \
|
|
|
|
_MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor)
|
|
|
|
|
|
|
|
#define _MACHINE_IS_EXPIRED(cutoff, ...) \
|
|
|
|
_MACHINE_VER_PICK(__VA_ARGS__, \
|
|
|
|
_MACHINE_VER_IS_EXPIRED5, \
|
|
|
|
_MACHINE_VER_IS_EXPIRED4, \
|
|
|
|
_MACHINE_VER_IS_EXPIRED3, \
|
|
|
|
_MACHINE_VER_IS_EXPIRED2) (cutoff, __VA_ARGS__)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Evaluates true when a machine type with (major, minor)
|
|
|
|
* or (major, minor, micro) version should be considered
|
|
|
|
* deprecated based on the current versioned machine type
|
|
|
|
* lifecycle rules
|
|
|
|
*/
|
|
|
|
#define MACHINE_VER_IS_DEPRECATED(...) \
|
|
|
|
_MACHINE_IS_EXPIRED(MACHINE_VER_DEPRECATION_MAJOR, __VA_ARGS__)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Evaluates true when a machine type with (major, minor)
|
|
|
|
* or (major, minor, micro) version should be considered
|
|
|
|
* for deletion based on the current versioned machine type
|
|
|
|
* lifecycle rules
|
|
|
|
*/
|
|
|
|
#define MACHINE_VER_SHOULD_DELETE(...) \
|
|
|
|
_MACHINE_IS_EXPIRED(MACHINE_VER_DELETION_MAJOR, __VA_ARGS__)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sets the deprecation reason for a versioned machine based
|
|
|
|
* on its age
|
|
|
|
*
|
|
|
|
* This must be unconditionally used in the _class_init
|
|
|
|
* function for all machine types which support versioning.
|
|
|
|
*
|
|
|
|
* Initially it will effectively be a no-op, but after a
|
|
|
|
* suitable period of time has passed, it will set the
|
|
|
|
* 'deprecation_reason' field on the machine, to warn users
|
|
|
|
* about forthcoming removal.
|
|
|
|
*/
|
|
|
|
#define MACHINE_VER_DEPRECATION(...) \
|
|
|
|
do { \
|
|
|
|
if (MACHINE_VER_IS_DEPRECATED(__VA_ARGS__)) { \
|
|
|
|
mc->deprecation_reason = MACHINE_VER_DEPRECATION_MSG; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prevents registration of a versioned machined based on
|
|
|
|
* its age
|
|
|
|
*
|
|
|
|
* This must be unconditionally used in the register
|
|
|
|
* method for all machine types which support versioning.
|
|
|
|
*
|
|
|
|
* Inijtially it will effectively be a no-op, but after a
|
|
|
|
* suitable period of time has passed, it will cause
|
|
|
|
* execution of the method to return, avoiding registration
|
|
|
|
* of the machine
|
2024-06-20 19:57:37 +03:00
|
|
|
*
|
|
|
|
* The new deprecation and deletion policy for versioned
|
|
|
|
* machine types was introduced in QEMU 9.1.0.
|
|
|
|
*
|
|
|
|
* Under the new policy a number of old machine types (any
|
|
|
|
* prior to 2.12) would be liable for immediate deletion
|
|
|
|
* which would be a violation of our historical deprecation
|
|
|
|
* and removal policy
|
|
|
|
*
|
|
|
|
* Thus deletions are temporarily gated on existance of
|
|
|
|
* the env variable "QEMU_DELETE_MACHINES" / QEMU version
|
|
|
|
* number >= 10.1.0. This gate can be deleted in the 10.1.0
|
|
|
|
* dev cycle
|
2024-06-20 19:57:36 +03:00
|
|
|
*/
|
|
|
|
#define MACHINE_VER_DELETION(...) \
|
|
|
|
do { \
|
|
|
|
if (MACHINE_VER_SHOULD_DELETE(__VA_ARGS__)) { \
|
2024-06-20 19:57:37 +03:00
|
|
|
if (getenv("QEMU_DELETE_MACHINES") || \
|
|
|
|
QEMU_VERSION_MAJOR > 10 || (QEMU_VERSION_MAJOR == 10 && \
|
|
|
|
QEMU_VERSION_MINOR >= 1)) { \
|
|
|
|
return; \
|
|
|
|
} \
|
2024-06-20 19:57:36 +03:00
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2015-09-04 21:37:06 +03:00
|
|
|
#define DEFINE_MACHINE(namestr, machine_initfn) \
|
|
|
|
static void machine_initfn##_class_init(ObjectClass *oc, void *data) \
|
|
|
|
{ \
|
|
|
|
MachineClass *mc = MACHINE_CLASS(oc); \
|
|
|
|
machine_initfn(mc); \
|
|
|
|
} \
|
|
|
|
static const TypeInfo machine_initfn##_typeinfo = { \
|
|
|
|
.name = MACHINE_TYPE_NAME(namestr), \
|
|
|
|
.parent = TYPE_MACHINE, \
|
|
|
|
.class_init = machine_initfn##_class_init, \
|
|
|
|
}; \
|
|
|
|
static void machine_initfn##_register_types(void) \
|
|
|
|
{ \
|
|
|
|
type_register_static(&machine_initfn##_typeinfo); \
|
|
|
|
} \
|
2016-02-16 23:59:04 +03:00
|
|
|
type_init(machine_initfn##_register_types)
|
2015-09-04 21:37:06 +03:00
|
|
|
|
2024-09-03 19:22:16 +03:00
|
|
|
extern GlobalProperty hw_compat_9_1[];
|
|
|
|
extern const size_t hw_compat_9_1_len;
|
|
|
|
|
2024-03-25 17:01:51 +03:00
|
|
|
extern GlobalProperty hw_compat_9_0[];
|
|
|
|
extern const size_t hw_compat_9_0_len;
|
|
|
|
|
2023-11-20 12:42:59 +03:00
|
|
|
extern GlobalProperty hw_compat_8_2[];
|
|
|
|
extern const size_t hw_compat_8_2_len;
|
|
|
|
|
2023-07-18 17:22:35 +03:00
|
|
|
extern GlobalProperty hw_compat_8_1[];
|
|
|
|
extern const size_t hw_compat_8_1_len;
|
|
|
|
|
2023-03-14 20:30:09 +03:00
|
|
|
extern GlobalProperty hw_compat_8_0[];
|
|
|
|
extern const size_t hw_compat_8_0_len;
|
|
|
|
|
2022-12-12 18:21:44 +03:00
|
|
|
extern GlobalProperty hw_compat_7_2[];
|
|
|
|
extern const size_t hw_compat_7_2_len;
|
|
|
|
|
2022-07-27 15:17:55 +03:00
|
|
|
extern GlobalProperty hw_compat_7_1[];
|
|
|
|
extern const size_t hw_compat_7_1_len;
|
|
|
|
|
2022-03-16 17:55:21 +03:00
|
|
|
extern GlobalProperty hw_compat_7_0[];
|
|
|
|
extern const size_t hw_compat_7_0_len;
|
|
|
|
|
2021-12-17 17:39:48 +03:00
|
|
|
extern GlobalProperty hw_compat_6_2[];
|
|
|
|
extern const size_t hw_compat_6_2_len;
|
|
|
|
|
2021-08-31 04:54:26 +03:00
|
|
|
extern GlobalProperty hw_compat_6_1[];
|
|
|
|
extern const size_t hw_compat_6_1_len;
|
|
|
|
|
2021-03-31 14:19:00 +03:00
|
|
|
extern GlobalProperty hw_compat_6_0[];
|
|
|
|
extern const size_t hw_compat_6_0_len;
|
|
|
|
|
2020-11-09 20:39:28 +03:00
|
|
|
extern GlobalProperty hw_compat_5_2[];
|
|
|
|
extern const size_t hw_compat_5_2_len;
|
|
|
|
|
2020-08-19 17:40:16 +03:00
|
|
|
extern GlobalProperty hw_compat_5_1[];
|
|
|
|
extern const size_t hw_compat_5_1_len;
|
|
|
|
|
2020-04-29 17:46:05 +03:00
|
|
|
extern GlobalProperty hw_compat_5_0[];
|
|
|
|
extern const size_t hw_compat_5_0_len;
|
|
|
|
|
2019-11-05 21:22:17 +03:00
|
|
|
extern GlobalProperty hw_compat_4_2[];
|
|
|
|
extern const size_t hw_compat_4_2_len;
|
|
|
|
|
2019-07-24 13:35:24 +03:00
|
|
|
extern GlobalProperty hw_compat_4_1[];
|
|
|
|
extern const size_t hw_compat_4_1_len;
|
|
|
|
|
2019-04-11 13:20:25 +03:00
|
|
|
extern GlobalProperty hw_compat_4_0[];
|
|
|
|
extern const size_t hw_compat_4_0_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_3_1[];
|
|
|
|
extern const size_t hw_compat_3_1_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_3_0[];
|
|
|
|
extern const size_t hw_compat_3_0_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_12[];
|
|
|
|
extern const size_t hw_compat_2_12_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_11[];
|
|
|
|
extern const size_t hw_compat_2_11_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_10[];
|
|
|
|
extern const size_t hw_compat_2_10_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_9[];
|
|
|
|
extern const size_t hw_compat_2_9_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_8[];
|
|
|
|
extern const size_t hw_compat_2_8_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_7[];
|
|
|
|
extern const size_t hw_compat_2_7_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_6[];
|
|
|
|
extern const size_t hw_compat_2_6_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_5[];
|
|
|
|
extern const size_t hw_compat_2_5_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_4[];
|
|
|
|
extern const size_t hw_compat_2_4_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_3[];
|
|
|
|
extern const size_t hw_compat_2_3_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_2[];
|
|
|
|
extern const size_t hw_compat_2_2_len;
|
|
|
|
|
2018-12-12 18:36:30 +03:00
|
|
|
extern GlobalProperty hw_compat_2_1[];
|
|
|
|
extern const size_t hw_compat_2_1_len;
|
|
|
|
|
2007-11-17 20:14:51 +03:00
|
|
|
#endif
|