qemu/include/hw/boards.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

525 lines
19 KiB
C
Raw Normal View History

/* Declarations for use by board files for creating devices. */
#ifndef HW_BOARDS_H
#define HW_BOARDS_H
#include "exec/memory.h"
#include "sysemu/hostmem.h"
#include "sysemu/blockdev.h"
#include "qapi/qapi-types-machine.h"
#include "qemu/module.h"
#include "qom/object.h"
#include "hw/core/cpu.h"
#define TYPE_MACHINE_SUFFIX "-machine"
/* Machine class name that needs to be used for class-name-based machine
* type lookup to work.
*/
#define MACHINE_TYPE_NAME(machinename) (machinename TYPE_MACHINE_SUFFIX)
#define TYPE_MACHINE "machine"
#undef MACHINE /* BSD defines it and QEMU does not use it */
OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE)
extern MachineState *current_machine;
/**
* machine_class_default_cpu_type: Return the machine default CPU type.
* @mc: Machine class
*/
const char *machine_class_default_cpu_type(MachineClass *mc);
void machine_add_audiodev_property(MachineClass *mc);
void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp);
bool machine_usb(MachineState *machine);
int machine_phandle_start(MachineState *machine);
bool machine_dump_guest_core(MachineState *machine);
bool machine_mem_merge(MachineState *machine);
bool machine_require_guest_memfd(MachineState *machine);
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props,
Error **errp);
void machine_parse_smp_config(MachineState *ms,
const SMPConfiguration *config, Error **errp);
unsigned int machine_topo_get_cores_per_socket(const MachineState *ms);
unsigned int machine_topo_get_threads_per_socket(const MachineState *ms);
void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size);
/**
* machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices
* @mc: Machine class
* @type: type to allow (should be a subtype of TYPE_SYS_BUS_DEVICE)
*
* Add the QOM type @type to the list of devices of which are subtypes
* of TYPE_SYS_BUS_DEVICE but which are still permitted to be dynamically
* created (eg by the user on the command line with -device).
* By default if the user tries to create any devices on the command line
* that are subtypes of TYPE_SYS_BUS_DEVICE they will get an error message;
* for the special cases which are permitted for this machine model, the
* machine model class init code must call this function to add them
* to the list of specifically permitted devices.
*/
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
/**
* device_type_is_dynamic_sysbus: Check if type is an allowed sysbus device
* type for the machine class.
* @mc: Machine class
* @type: type to check (should be a subtype of TYPE_SYS_BUS_DEVICE)
*
* Returns: true if @type is a type in the machine's list of
* dynamically pluggable sysbus devices; otherwise false.
*
* Check if the QOM type @type is in the list of allowed sysbus device
* types (see machine_class_allowed_dynamic_sysbus_dev()).
* Note that if @type has a parent type in the list, it is allowed too.
*/
bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type);
/**
* device_is_dynamic_sysbus: test whether device is a dynamic sysbus device
* @mc: Machine class
* @dev: device to check
*
* Returns: true if @dev is a sysbus device on the machine's list
* of dynamically pluggable sysbus devices; otherwise false.
*
* This function checks whether @dev is a valid dynamic sysbus device,
* by first confirming that it is a sysbus device and then checking it
* against the list of permitted dynamic sysbus devices which has been
* set up by the machine using machine_class_allow_dynamic_sysbus_dev().
*
* It is valid to call this with something that is not a subclass of
* TYPE_SYS_BUS_DEVICE; the function will return false in this case.
* This allows hotplug callback functions to be written as:
* if (device_is_dynamic_sysbus(mc, dev)) {
* handle dynamic sysbus case;
* } else if (some other kind of hotplug) {
* handle that;
* }
*/
bool device_is_dynamic_sysbus(MachineClass *mc, DeviceState *dev);
/*
* Checks that backend isn't used, preps it for exclusive usage and
* returns migratable MemoryRegion provided by backend.
*/
MemoryRegion *machine_consume_memdev(MachineState *machine,
HostMemoryBackend *backend);
/**
* CPUArchId:
* @arch_id - architecture-dependent CPU ID of present or possible CPU
* @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
* @type - QOM class name of possible @cpu object
* @props - CPU object properties, initialized by board
* #vcpus_count - number of threads provided by @cpu object
*/
typedef struct CPUArchId {
uint64_t arch_id;
int64_t vcpus_count;
CpuInstanceProperties props;
CPUState *cpu;
const char *type;
} CPUArchId;
/**
* CPUArchIdList:
* @len - number of @CPUArchId items in @cpus array
* @cpus - array of present or possible CPUs for current machine configuration
*/
typedef struct {
int len;
misc: Replace zero-length arrays with flexible array member (manual) Description copied from Linux kernel commit from Gustavo A. R. Silva (see [3]): --v-- description start --v-- The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member [1], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being unadvertenly introduced [2] to the Linux codebase from now on. --^-- description end --^-- Do the similar housekeeping in the QEMU codebase (which uses C99 since commit 7be41675f7cb). All these instances of code were found with the help of the following command (then manual analysis, without modifying structures only having a single flexible array member, such QEDTable in block/qed.h): git grep -F '[0];' [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=76497732932f [3] https://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git/commit/?id=17642a2fbd2c1 Inspired-by: Gustavo A. R. Silva <gustavo@embeddedor.com> Reviewed-by: David Hildenbrand <david@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-03-04 18:38:16 +03:00
CPUArchId cpus[];
} CPUArchIdList;
/**
* SMPCompatProps:
* @prefer_sockets - whether sockets are preferred over cores in smp parsing
* @dies_supported - whether dies are supported by the machine
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
* @clusters_supported - whether clusters are supported by the machine
* @has_clusters - whether clusters are explicitly specified in the user
* provided SMP configuration
* @books_supported - whether books are supported by the machine
* @drawers_supported - whether drawers are supported by the machine
hw/core/machine: Introduce the module as a CPU topology level In x86, module is the topology level above core, which contains a set of cores that share certain resources (in current products, the resource usually includes L2 cache, as well as module scoped features and MSRs). Though smp.clusters could also share the L2 cache resource [1], there are following reasons that drive us to introduce the new smp.modules: * As the CPU topology abstraction in device tree [2], cluster supports nesting (though currently QEMU hasn't support that). In contrast, (x86) module does not support nesting. * Due to nesting, there is great flexibility in sharing resources on cluster, rather than narrowing cluster down to sharing L2 (and L3 tags) as the lowest topology level that contains cores. * Flexible nesting of cluster allows it to correspond to any level between the x86 package and core. * In Linux kernel, x86's cluster only represents the L2 cache domain but QEMU's smp.clusters is the CPU topology level. Linux kernel will also expose module level topology information in sysfs for x86. To avoid cluster ambiguity and keep a consistent CPU topology naming style with the Linux kernel, we introduce module level for x86. The module is, in existing hardware practice, the lowest layer that contains the core, while the cluster is able to have a higher topological scope than the module due to its nesting. Therefore, place the module between the cluster and the core: drawer/book/socket/die/cluster/module/core/thread With the above topological hierarchy order, introduce module level support in MachineState and MachineClass. [1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/ [2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
* @modules_supported - whether modules are supported by the machine
*/
typedef struct {
bool prefer_sockets;
bool dies_supported;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
bool clusters_supported;
bool has_clusters;
bool books_supported;
bool drawers_supported;
hw/core/machine: Introduce the module as a CPU topology level In x86, module is the topology level above core, which contains a set of cores that share certain resources (in current products, the resource usually includes L2 cache, as well as module scoped features and MSRs). Though smp.clusters could also share the L2 cache resource [1], there are following reasons that drive us to introduce the new smp.modules: * As the CPU topology abstraction in device tree [2], cluster supports nesting (though currently QEMU hasn't support that). In contrast, (x86) module does not support nesting. * Due to nesting, there is great flexibility in sharing resources on cluster, rather than narrowing cluster down to sharing L2 (and L3 tags) as the lowest topology level that contains cores. * Flexible nesting of cluster allows it to correspond to any level between the x86 package and core. * In Linux kernel, x86's cluster only represents the L2 cache domain but QEMU's smp.clusters is the CPU topology level. Linux kernel will also expose module level topology information in sysfs for x86. To avoid cluster ambiguity and keep a consistent CPU topology naming style with the Linux kernel, we introduce module level for x86. The module is, in existing hardware practice, the lowest layer that contains the core, while the cluster is able to have a higher topological scope than the module due to its nesting. Therefore, place the module between the cluster and the core: drawer/book/socket/die/cluster/module/core/thread With the above topological hierarchy order, introduce module level support in MachineState and MachineClass. [1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/ [2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
bool modules_supported;
} SMPCompatProps;
/**
* MachineClass:
* @deprecation_reason: If set, the machine is marked as deprecated. The
* string should provide some clear information about what to use instead.
* @max_cpus: maximum number of CPUs supported. Default: 1
* @min_cpus: minimum number of CPUs supported. Default: 1
* @default_cpus: number of CPUs instantiated if none are specified. Default: 1
* @is_default:
* If true QEMU will use this machine by default if no '-M' option is given.
* @get_hotplug_handler: this function is called during bus-less
* device hotplug. If defined it returns pointer to an instance
* of HotplugHandler object, which handles hotplug operation
* for a given @dev. It may return NULL if @dev doesn't require
* any actions to be performed by hotplug handler.
* @cpu_index_to_instance_props:
* used to provide @cpu_index to socket/core/thread number mapping, allowing
* legacy code to perform mapping from cpu_index to topology properties
* Returns: tuple of socket/core/thread ids given cpu_index belongs to.
* used to provide @cpu_index to socket number mapping, allowing
* a machine to group CPU threads belonging to the same socket/package
* Returns: socket number given cpu_index belongs to.
* @hw_version:
* Value of QEMU_VERSION when the machine was added to QEMU.
* Set only by old machines because they need to keep
* compatibility on code that exposed QEMU_VERSION to guests in
* the past (and now use qemu_hw_version()).
* @possible_cpu_arch_ids:
* Returns an array of @CPUArchId architecture-dependent CPU IDs
* which includes CPU IDs for present and possible to hotplug CPUs.
* Caller is responsible for freeing returned list.
* @get_default_cpu_node_id:
* returns default board specific node_id value for CPU slot specified by
* index @idx in @ms->possible_cpus[]
* @has_hotpluggable_cpus:
* If true, board supports CPUs creation with -device/device_add.
vl.c: convert cpu_model to cpu type and set of global properties before machine_init() All machines that support user specified cpu_model either call cpu_generic_init() or cpu_class_by_name()/CPUClass::parse_features to parse feature string and to get CPU type to create. Which leads to code duplication and hard-codding default CPU model within machine_foo_init() code. Which makes it impossible to get CPU type before machine_init() is run. So instead of setting default CPUs models and doing parsing in target specific machine_foo_init() in various ways, provide a generic data driven cpu_model parsing before machine_init() is called. in follow up per target patches, it will allow to: * define default CPU type in consistent/generic manner per machine type and drop custom code that fallbacks to default if cpu_model is NULL * drop custom features parsing in targets and do it in centralized way. * for cases of cpu_generic_init(TYPE_BASE/DEFAULT_CPU, "some_cpu") replace it with cpu_create(machine->cpu_type) || cpu_create(TYPE_FOO) depending if CPU type is user settable or not. not doing useless parsing and clearly documenting where CPU model is user settable or fixed one. Patch allows machine subclasses to define default CPU type per machine class at class_init() time and if that is set generic code will parse cpu_model into a MachineState::cpu_type which will be used to create CPUs for that machine instance and allows gradual per board conversion. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <1505318697-77161-4-git-send-email-imammedo@redhat.com> Acked-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-09-13 19:04:55 +03:00
* @default_cpu_type:
* specifies default CPU_TYPE, which will be used for parsing target
* specific features and for creating CPUs if CPU name wasn't provided
* explicitly at CLI
* @minimum_page_bits:
* If non-zero, the board promises never to create a CPU with a page size
* smaller than this, so QEMU can use a more efficient larger page
* size than the target architecture's minimum. (Attempting to create
* such a CPU will fail.) Note that changing this is a migration
* compatibility break for the machine.
* @ignore_memory_transaction_failures:
* If this is flag is true then the CPU will ignore memory transaction
* failures which should cause the CPU to take an exception due to an
* access to an unassigned physical address; the transaction will instead
* return zero (for a read) or be ignored (for a write). This should be
* set only by legacy board models which rely on the old RAZ/WI behaviour
* for handling devices that QEMU does not yet model. New board models
* should instead use "unimplemented-device" for all memory ranges where
* the guest will attempt to probe for a device that QEMU doesn't
* implement and a stub device is required.
* @kvm_type:
* Return the type of KVM corresponding to the kvm-type string option or
* computed based on other criteria such as the host kernel capabilities.
* kvm-type may be NULL if it is not needed.
* @numa_mem_supported:
* true if '--numa node.mem' option is supported and false otherwise
* @hotplug_allowed:
* If the hook is provided, then it'll be called for each device
* hotplug to check whether the device hotplug is allowed. Return
* true to grant allowance or false to reject the hotplug. When
* false is returned, an error must be set to show the reason of
* the rejection. If the hook is not provided, all hotplug will be
* allowed.
* @default_ram_id:
* Specifies initial RAM MemoryRegion name to be used for default backend
* creation if user explicitly hasn't specified backend with "memory-backend"
* property.
* It also will be used as a way to option into "-m" option support.
* If it's not set by board, '-m' will be ignored and generic code will
* not create default RAM MemoryRegion.
vl/s390x: fixup ram sizes for compat machines Older QEMU versions did fixup the ram size to match what can be reported via sclp. We need to mimic this behaviour for machine types 4.2 and older to not fail on inbound migration for memory sizes that do not fit. Old machines with proper aligned memory sizes are not affected. Alignment table: VM size (<=) | Alignment -------------------------- 1020M | 1M 2040M | 2M 4080M | 4M 8160M | 8M 16320M | 16M 32640M | 32M 65280M | 64M 130560M | 128M 261120M | 256M 522240M | 512M 1044480M | 1G 2088960M | 2G 4177920M | 4G 8355840M | 8G Suggested action is to replace unaligned -m value with a suitable aligned one or if a change to a newer machine type is possible, use a machine version >= 5.0. A future version might remove the compatibility handling. For machine types >= 5.0 we can simply use an increment size of 1M and use the full range of increment number which allows for all possible memory sizes. The old limitation of having a maximum of 1020 increments was added for standby memory, which we no longer support. With that we can now support even weird memory sizes like 10001234 MB. As we no longer fixup maxram_size as well, make other users use ram_size instead. Keep using maxram_size when setting the maximum ram size in KVM, as that will come in handy in the future when supporting memory hotplug (in contrast, storage keys and storage attributes for hotplugged memory will have to be migrated per RAM block in the future). Fixes: 3a12fc61af5c ("390x/s390-virtio-ccw: use memdev for RAM") Reported-by: Lukáš Doktor <ldoktor@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Reviewed-by: David Hildenbrand <david@redhat.com> Acked-by: Igor Mammedov <imammedo@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com> Message-Id: <20200401123754.109602-1-borntraeger@de.ibm.com> [CH: fixed up message on memory size fixup] Signed-off-by: Cornelia Huck <cohuck@redhat.com>
2020-04-01 15:37:54 +03:00
* @fixup_ram_size:
* Amends user provided ram size (with -m option) using machine
* specific algorithm. To be used by old machine types for compat
* purposes only.
* Applies only to default memory backend, i.e., explicit memory backend
* wasn't used.
*/
struct MachineClass {
/*< private >*/
ObjectClass parent_class;
/*< public >*/
well-defined listing order for machine types Commit 261747f1 ("vl: Use MachineClass instead of global QEMUMachine list") broke the ordering of the machine types in the user-visible output of qemu-system-XXXX -M \? This occurred because registration was rebased from a manually maintained linked list to GLib hash tables: qemu_register_machine() type_register() type_register_internal() type_table_add() g_hash_table_insert() and because the listing was rebased accordingly, from the traversal of the list to the traversal of the hash table (rendered as an ad-hoc list): machine_parse() object_class_get_list(TYPE_MACHINE) object_class_foreach() g_hash_table_foreach() The current order is a "random" one, for practical purposes, which is annoying for users. Introduce new members QEMUMachine.family and MachineClass.family, allowing machine types to be "clustered". Introduce a comparator function that establishes a total ordering between machine types, ordering machine types in the same family next to each other. In machine_parse(), list the supported machine types sorted with the comparator function. The comparator function: - sorts whole families before standalone machine types, - sorts whole families between each other in alphabetically increasing order, - sorts machine types inside the same family in alphabetically decreasing order, - sorts standalone machine types between each other in alphabetically increasing order. After this patch, all machine types are considered standalone, and accordingly, the output is alphabetically ascending. This will be refined in the following patches. Effects on the x86_64 output: Before: > Supported machines are: > pc-0.13 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-2.0 Standard PC (i440FX + PIIX, 1996) > pc-1.0 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-2.1 Standard PC (i440FX + PIIX, 1996) > pc-q35-1.7 Standard PC (Q35 + ICH9, 2009) > pc-1.1 Standard PC (i440FX + PIIX, 1996) > pc-0.14 Standard PC (i440FX + PIIX, 1996) > pc-q35-2.0 Standard PC (Q35 + ICH9, 2009) > pc-i440fx-1.4 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-1.5 Standard PC (i440FX + PIIX, 1996) > pc-0.15 Standard PC (i440FX + PIIX, 1996) > pc-q35-1.4 Standard PC (Q35 + ICH9, 2009) > isapc ISA-only PC > pc Standard PC (i440FX + PIIX, 1996) (alias of pc-i440fx-2.2) > pc-i440fx-2.2 Standard PC (i440FX + PIIX, 1996) (default) > pc-1.2 Standard PC (i440FX + PIIX, 1996) > pc-0.10 Standard PC (i440FX + PIIX, 1996) > pc-0.11 Standard PC (i440FX + PIIX, 1996) > pc-q35-2.1 Standard PC (Q35 + ICH9, 2009) > q35 Standard PC (Q35 + ICH9, 2009) (alias of pc-q35-2.2) > pc-q35-2.2 Standard PC (Q35 + ICH9, 2009) > pc-i440fx-1.6 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-1.7 Standard PC (i440FX + PIIX, 1996) > none empty machine > pc-q35-1.5 Standard PC (Q35 + ICH9, 2009) > pc-q35-1.6 Standard PC (Q35 + ICH9, 2009) > pc-0.12 Standard PC (i440FX + PIIX, 1996) > pc-1.3 Standard PC (i440FX + PIIX, 1996) After: > Supported machines are: > isapc ISA-only PC > none empty machine > pc-0.10 Standard PC (i440FX + PIIX, 1996) > pc-0.11 Standard PC (i440FX + PIIX, 1996) > pc-0.12 Standard PC (i440FX + PIIX, 1996) > pc-0.13 Standard PC (i440FX + PIIX, 1996) > pc-0.14 Standard PC (i440FX + PIIX, 1996) > pc-0.15 Standard PC (i440FX + PIIX, 1996) > pc-1.0 Standard PC (i440FX + PIIX, 1996) > pc-1.1 Standard PC (i440FX + PIIX, 1996) > pc-1.2 Standard PC (i440FX + PIIX, 1996) > pc-1.3 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-1.4 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-1.5 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-1.6 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-1.7 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-2.0 Standard PC (i440FX + PIIX, 1996) > pc-i440fx-2.1 Standard PC (i440FX + PIIX, 1996) > pc Standard PC (i440FX + PIIX, 1996) (alias of pc-i440fx-2.2) > pc-i440fx-2.2 Standard PC (i440FX + PIIX, 1996) (default) > pc-q35-1.4 Standard PC (Q35 + ICH9, 2009) > pc-q35-1.5 Standard PC (Q35 + ICH9, 2009) > pc-q35-1.6 Standard PC (Q35 + ICH9, 2009) > pc-q35-1.7 Standard PC (Q35 + ICH9, 2009) > pc-q35-2.0 Standard PC (Q35 + ICH9, 2009) > pc-q35-2.1 Standard PC (Q35 + ICH9, 2009) > q35 Standard PC (Q35 + ICH9, 2009) (alias of pc-q35-2.2) > pc-q35-2.2 Standard PC (Q35 + ICH9, 2009) Effects on the aarch64 output: Before: > Supported machines are: > lm3s811evb Stellaris LM3S811EVB > canon-a1100 Canon PowerShot A1100 IS > vexpress-a15 ARM Versatile Express for Cortex-A15 > vexpress-a9 ARM Versatile Express for Cortex-A9 > xilinx-zynq-a9 Xilinx Zynq Platform Baseboard for Cortex-A9 > connex Gumstix Connex (PXA255) > n800 Nokia N800 tablet aka. RX-34 (OMAP2420) > lm3s6965evb Stellaris LM3S6965EVB > versatileab ARM Versatile/AB (ARM926EJ-S) > borzoi Borzoi PDA (PXA270) > tosa Tosa PDA (PXA255) > cheetah Palm Tungsten|E aka. Cheetah PDA (OMAP310) > midway Calxeda Midway (ECX-2000) > mainstone Mainstone II (PXA27x) > n810 Nokia N810 tablet aka. RX-44 (OMAP2420) > terrier Terrier PDA (PXA270) > highbank Calxeda Highbank (ECX-1000) > cubieboard cubietech cubieboard > sx1-v1 Siemens SX1 (OMAP310) V1 > sx1 Siemens SX1 (OMAP310) V2 > realview-eb-mpcore ARM RealView Emulation Baseboard (ARM11MPCore) > kzm ARM KZM Emulation Baseboard (ARM1136) > akita Akita PDA (PXA270) > z2 Zipit Z2 (PXA27x) > musicpal Marvell 88w8618 / MusicPal (ARM926EJ-S) > realview-pb-a8 ARM RealView Platform Baseboard for Cortex-A8 > versatilepb ARM Versatile/PB (ARM926EJ-S) > realview-eb ARM RealView Emulation Baseboard (ARM926EJ-S) > realview-pbx-a9 ARM RealView Platform Baseboard Explore for Cortex-A9 > spitz Spitz PDA (PXA270) > none empty machine > virt ARM Virtual Machine > collie Collie PDA (SA-1110) > smdkc210 Samsung SMDKC210 board (Exynos4210) > verdex Gumstix Verdex (PXA270) > nuri Samsung NURI board (Exynos4210) > integratorcp ARM Integrator/CP (ARM926EJ-S) After: > Supported machines are: > akita Akita PDA (PXA270) > borzoi Borzoi PDA (PXA270) > canon-a1100 Canon PowerShot A1100 IS > cheetah Palm Tungsten|E aka. Cheetah PDA (OMAP310) > collie Collie PDA (SA-1110) > connex Gumstix Connex (PXA255) > cubieboard cubietech cubieboard > highbank Calxeda Highbank (ECX-1000) > integratorcp ARM Integrator/CP (ARM926EJ-S) > kzm ARM KZM Emulation Baseboard (ARM1136) > lm3s6965evb Stellaris LM3S6965EVB > lm3s811evb Stellaris LM3S811EVB > mainstone Mainstone II (PXA27x) > midway Calxeda Midway (ECX-2000) > musicpal Marvell 88w8618 / MusicPal (ARM926EJ-S) > n800 Nokia N800 tablet aka. RX-34 (OMAP2420) > n810 Nokia N810 tablet aka. RX-44 (OMAP2420) > none empty machine > nuri Samsung NURI board (Exynos4210) > realview-eb ARM RealView Emulation Baseboard (ARM926EJ-S) > realview-eb-mpcore ARM RealView Emulation Baseboard (ARM11MPCore) > realview-pb-a8 ARM RealView Platform Baseboard for Cortex-A8 > realview-pbx-a9 ARM RealView Platform Baseboard Explore for Cortex-A9 > smdkc210 Samsung SMDKC210 board (Exynos4210) > spitz Spitz PDA (PXA270) > sx1 Siemens SX1 (OMAP310) V2 > sx1-v1 Siemens SX1 (OMAP310) V1 > terrier Terrier PDA (PXA270) > tosa Tosa PDA (PXA255) > verdex Gumstix Verdex (PXA270) > versatileab ARM Versatile/AB (ARM926EJ-S) > versatilepb ARM Versatile/PB (ARM926EJ-S) > vexpress-a15 ARM Versatile Express for Cortex-A15 > vexpress-a9 ARM Versatile Express for Cortex-A9 > virt ARM Virtual Machine > xilinx-zynq-a9 Xilinx Zynq Platform Baseboard for Cortex-A9 > z2 Zipit Z2 (PXA27x) RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1145042 Signed-off-by: Laszlo Ersek <lersek@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel.a@redhat.com> Acked-by: David Gibson <david@gibson.dropbear.id.au>
2014-09-23 00:38:35 +04:00
const char *family; /* NULL iff @name identifies a standalone machtype */
char *name;
const char *alias;
const char *desc;
const char *deprecation_reason;
void (*init)(MachineState *state);
void (*reset)(MachineState *state, ShutdownCause reason);
void (*wakeup)(MachineState *state);
int (*kvm_type)(MachineState *machine, const char *arg);
BlockInterfaceType block_default_type;
pc/vl: Add units-per-default-bus property This patch adds the 'units_per_default_bus' property which allows individual boards to declare their desired index => (bus,unit) mapping for their default HBA, so that boards such as Q35 can specify that its default if_ide HBA, AHCI, only accepts one unit per bus. This property only overrides the mapping for drives matching the block_default_type interface. This patch also adds this property to *all* past and present Q35 machine types. This retroactive addition is justified because the previous erroneous index=>(bus,unit) mappings caused by lack of such a property were not utilized due to lack of initialization code in the Q35 init routine. Further, semantically, the Q35 board type has always had the property that its default HBA, AHCI, only accepts one unit per bus. The new code added to add devices to drives relies upon the accuracy of this mapping. Thus, the property is applied retroactively to reduce complexity of allowing IDE HBAs with different units per bus. Examples: Prior to this patch, all IDE HBAs were assumed to use 2 units per bus (Master, Slave). When using Q35 and AHCI, however, we only allow one unit per bus. -hdb foo.qcow2 would become index=1, or bus=0,unit=1. -hdd foo.qcow2 would become index=3, or bus=1,unit=1. -drive file=foo.qcow2,index=5 becomes bus=2,unit=1. These are invalid for AHCI. They now become, under Q35 only: -hdb foo.qcow2 --> index=1, bus=1, unit=0. -hdd foo.qcow2 --> index=3, bus=3, unit=0. -drive file=foo.qcow2,index=5 --> bus=5,unit=0. The mapping is adjusted based on the fact that the default IF for the Q35 machine type is IF_IDE, and units-per-default-bus overrides the IDE mapping from its default of 2 units per bus to just 1 unit per bus. Signed-off-by: John Snow <jsnow@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-id: 1412187569-23452-4-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-10-01 22:19:26 +04:00
int units_per_default_bus;
int max_cpus;
int min_cpus;
int default_cpus;
unsigned int no_serial:1,
no_parallel:1,
no_floppy:1,
no_cdrom:1,
no_sdcard:1,
pci_allow_0_address:1,
legacy_fw_cfg_order:1;
bool is_default;
const char *default_machine_opts;
const char *default_boot_order;
const char *default_display;
const char *default_nic;
GPtrArray *compat_props;
const char *hw_version;
ram_addr_t default_ram_size;
vl.c: convert cpu_model to cpu type and set of global properties before machine_init() All machines that support user specified cpu_model either call cpu_generic_init() or cpu_class_by_name()/CPUClass::parse_features to parse feature string and to get CPU type to create. Which leads to code duplication and hard-codding default CPU model within machine_foo_init() code. Which makes it impossible to get CPU type before machine_init() is run. So instead of setting default CPUs models and doing parsing in target specific machine_foo_init() in various ways, provide a generic data driven cpu_model parsing before machine_init() is called. in follow up per target patches, it will allow to: * define default CPU type in consistent/generic manner per machine type and drop custom code that fallbacks to default if cpu_model is NULL * drop custom features parsing in targets and do it in centralized way. * for cases of cpu_generic_init(TYPE_BASE/DEFAULT_CPU, "some_cpu") replace it with cpu_create(machine->cpu_type) || cpu_create(TYPE_FOO) depending if CPU type is user settable or not. not doing useless parsing and clearly documenting where CPU model is user settable or fixed one. Patch allows machine subclasses to define default CPU type per machine class at class_init() time and if that is set generic code will parse cpu_model into a MachineState::cpu_type which will be used to create CPUs for that machine instance and allows gradual per board conversion. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <1505318697-77161-4-git-send-email-imammedo@redhat.com> Acked-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-09-13 19:04:55 +03:00
const char *default_cpu_type;
bool default_kernel_irqchip_split;
bool option_rom_has_mr;
bool rom_file_has_mr;
int minimum_page_bits;
bool has_hotpluggable_cpus;
bool ignore_memory_transaction_failures;
int numa_mem_align_shift;
const char * const *valid_cpu_types;
strList *allowed_dynamic_sysbus_devices;
NUMA: Enable adding NUMA node implicitly Linux and Windows need ACPI SRAT table to make memory hotplug work properly, however currently QEMU doesn't create SRAT table if numa options aren't present on CLI. Which breaks both linux and windows guests in certain conditions: * Windows: won't enable memory hotplug without SRAT table at all * Linux: if QEMU is started with initial memory all below 4Gb and no SRAT table present, guest kernel will use nommu DMA ops, which breaks 32bit hw drivers when memory is hotplugged and guest tries to use it with that drivers. Fix above issues by automatically creating a numa node when QEMU is started with memory hotplug enabled but without '-numa' options on CLI. (PS: auto-create numa node only for new machine types so not to break migration). Which would provide SRAT table to guests without explicit -numa options on CLI and would allow: * Windows: to enable memory hotplug * Linux: switch to SWIOTLB DMA ops, to bounce DMA transfers to 32bit allocated buffers that legacy drivers/hw can handle. [Rewritten by Igor] Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> Suggested-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Richard Henderson <rth@twiddle.net> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Thomas Huth <thuth@redhat.com> Cc: Alistair Francis <alistair23@gmail.com> Cc: Takao Indoh <indou.takao@jp.fujitsu.com> Cc: Izumi Taku <izumi.taku@jp.fujitsu.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2017-11-14 05:34:01 +03:00
bool auto_enable_numa_with_memhp;
bool auto_enable_numa_with_memdev;
bool ignore_boot_device_suffixes;
bool smbus_no_migration_support;
bool nvdimm_supported;
bool numa_mem_supported;
bool auto_enable_numa;
numa: Validate cluster and NUMA node boundary if required For some architectures like ARM64, multiple CPUs in one cluster can be associated with different NUMA nodes, which is irregular configuration because we shouldn't have this in baremetal environment. The irregular configuration causes Linux guest to misbehave, as the following warning messages indicate. -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : build_sched_domains+0x284/0x910 lr : build_sched_domains+0x184/0x910 sp : ffff80000804bd50 x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 Call trace: build_sched_domains+0x284/0x910 sched_init_domains+0xac/0xe0 sched_init_smp+0x48/0xc8 kernel_init_freeable+0x140/0x1ac kernel_init+0x28/0x140 ret_from_fork+0x10/0x20 Improve the situation to warn when multiple CPUs in one cluster have been associated with different NUMA nodes. However, one NUMA node is allowed to be associated with different clusters. Signed-off-by: Gavin Shan <gshan@redhat.com> Acked-by: Philippe Mathieu-Daudé <philmd@linaro.org> Acked-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <20230509002739.18388-2-gshan@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2023-05-09 03:27:37 +03:00
bool cpu_cluster_has_numa_boundary;
SMPCompatProps smp_props;
const char *default_ram_id;
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
Error **errp);
CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
unsigned cpu_index);
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
vl/s390x: fixup ram sizes for compat machines Older QEMU versions did fixup the ram size to match what can be reported via sclp. We need to mimic this behaviour for machine types 4.2 and older to not fail on inbound migration for memory sizes that do not fit. Old machines with proper aligned memory sizes are not affected. Alignment table: VM size (<=) | Alignment -------------------------- 1020M | 1M 2040M | 2M 4080M | 4M 8160M | 8M 16320M | 16M 32640M | 32M 65280M | 64M 130560M | 128M 261120M | 256M 522240M | 512M 1044480M | 1G 2088960M | 2G 4177920M | 4G 8355840M | 8G Suggested action is to replace unaligned -m value with a suitable aligned one or if a change to a newer machine type is possible, use a machine version >= 5.0. A future version might remove the compatibility handling. For machine types >= 5.0 we can simply use an increment size of 1M and use the full range of increment number which allows for all possible memory sizes. The old limitation of having a maximum of 1020 increments was added for standby memory, which we no longer support. With that we can now support even weird memory sizes like 10001234 MB. As we no longer fixup maxram_size as well, make other users use ram_size instead. Keep using maxram_size when setting the maximum ram size in KVM, as that will come in handy in the future when supporting memory hotplug (in contrast, storage keys and storage attributes for hotplugged memory will have to be migrated per RAM block in the future). Fixes: 3a12fc61af5c ("390x/s390-virtio-ccw: use memdev for RAM") Reported-by: Lukáš Doktor <ldoktor@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Reviewed-by: David Hildenbrand <david@redhat.com> Acked-by: Igor Mammedov <imammedo@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com> Message-Id: <20200401123754.109602-1-borntraeger@de.ibm.com> [CH: fixed up message on memory size fixup] Signed-off-by: Cornelia Huck <cohuck@redhat.com>
2020-04-01 15:37:54 +03:00
ram_addr_t (*fixup_ram_size)(ram_addr_t size);
};
/**
* DeviceMemoryState:
* @base: address in guest physical address space where the memory
* address space for memory devices starts
* @mr: memory region container for memory devices
* @as: address space for memory devices
* @listener: memory listener used to track used memslots in the address space
* @dimm_size: the sum of plugged DIMMs' sizes
* @used_region_size: the part of @mr already used by memory devices
* @required_memslots: the number of memslots required by memory devices
* @used_memslots: the number of memslots currently used by memory devices
memory-device,vhost: Support automatic decision on the number of memslots We want to support memory devices that can automatically decide how many memslots they will use. In the worst case, they have to use a single memslot. The target use cases are virtio-mem and the hyper-v balloon. Let's calculate a reasonable limit such a memory device may use, and instruct the device to make a decision based on that limit. Use a simple heuristic that considers: * A memslot soft-limit for all memory devices of 256; also, to not consume too many memslots -- which could harm performance. * Actually still free and unreserved memslots * The percentage of the remaining device memory region that memory device will occupy. Further, while we properly check before plugging a memory device whether there still is are free memslots, we have other memslot consumers (such as boot memory, PCI BARs) that don't perform any checks and might dynamically consume memslots without any prior reservation. So we might succeed in plugging a memory device, but once we dynamically map a PCI BAR we would be in trouble. Doing accounting / reservation / checks for all such users is problematic (e.g., sometimes we might temporarily split boot memory into two memslots, triggered by the BIOS). We use the historic magic memslot number of 509 as orientation to when supporting 256 memory devices -> memslots (leaving 253 for boot memory and other devices) has been proven to work reliable. We'll fallback to suggesting a single memslot if we don't have at least 509 total memslots. Plugging vhost devices with less than 509 memslots available while we have memory devices plugged that consume multiple memslots due to automatic decisions can be problematic. Most configurations might just fail due to "limit < used + reserved", however, it can also happen that these memory devices would suddenly consume memslots that would actually be required by other memslot consumers (boot, PCI BARs) later. Note that this has always been sketchy with vhost devices that support only a small number of memslots; but we don't want to make it any worse.So let's keep it simple and simply reject plugging such vhost devices in such a configuration. Eventually, all vhost devices that want to be fully compatible with such memory devices should support a decent number of memslots (>= 509). Message-ID: <20230926185738.277351-13-david@redhat.com> Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-26 21:57:32 +03:00
* @memslot_auto_decision_active: whether any plugged memory device
* automatically decided to use more than
* one memslot
*/
typedef struct DeviceMemoryState {
hwaddr base;
MemoryRegion mr;
AddressSpace as;
MemoryListener listener;
uint64_t dimm_size;
uint64_t used_region_size;
unsigned int required_memslots;
unsigned int used_memslots;
memory-device,vhost: Support automatic decision on the number of memslots We want to support memory devices that can automatically decide how many memslots they will use. In the worst case, they have to use a single memslot. The target use cases are virtio-mem and the hyper-v balloon. Let's calculate a reasonable limit such a memory device may use, and instruct the device to make a decision based on that limit. Use a simple heuristic that considers: * A memslot soft-limit for all memory devices of 256; also, to not consume too many memslots -- which could harm performance. * Actually still free and unreserved memslots * The percentage of the remaining device memory region that memory device will occupy. Further, while we properly check before plugging a memory device whether there still is are free memslots, we have other memslot consumers (such as boot memory, PCI BARs) that don't perform any checks and might dynamically consume memslots without any prior reservation. So we might succeed in plugging a memory device, but once we dynamically map a PCI BAR we would be in trouble. Doing accounting / reservation / checks for all such users is problematic (e.g., sometimes we might temporarily split boot memory into two memslots, triggered by the BIOS). We use the historic magic memslot number of 509 as orientation to when supporting 256 memory devices -> memslots (leaving 253 for boot memory and other devices) has been proven to work reliable. We'll fallback to suggesting a single memslot if we don't have at least 509 total memslots. Plugging vhost devices with less than 509 memslots available while we have memory devices plugged that consume multiple memslots due to automatic decisions can be problematic. Most configurations might just fail due to "limit < used + reserved", however, it can also happen that these memory devices would suddenly consume memslots that would actually be required by other memslot consumers (boot, PCI BARs) later. Note that this has always been sketchy with vhost devices that support only a small number of memslots; but we don't want to make it any worse.So let's keep it simple and simply reject plugging such vhost devices in such a configuration. Eventually, all vhost devices that want to be fully compatible with such memory devices should support a decent number of memslots (>= 509). Message-ID: <20230926185738.277351-13-david@redhat.com> Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-26 21:57:32 +03:00
unsigned int memslot_auto_decision_active;
} DeviceMemoryState;
/**
* CpuTopology:
* @cpus: the number of present logical processors on the machine
* @drawers: the number of drawers on the machine
* @books: the number of books in one drawer
* @sockets: the number of sockets in one book
* @dies: the number of dies in one socket
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
* @clusters: the number of clusters in one die
hw/core/machine: Introduce the module as a CPU topology level In x86, module is the topology level above core, which contains a set of cores that share certain resources (in current products, the resource usually includes L2 cache, as well as module scoped features and MSRs). Though smp.clusters could also share the L2 cache resource [1], there are following reasons that drive us to introduce the new smp.modules: * As the CPU topology abstraction in device tree [2], cluster supports nesting (though currently QEMU hasn't support that). In contrast, (x86) module does not support nesting. * Due to nesting, there is great flexibility in sharing resources on cluster, rather than narrowing cluster down to sharing L2 (and L3 tags) as the lowest topology level that contains cores. * Flexible nesting of cluster allows it to correspond to any level between the x86 package and core. * In Linux kernel, x86's cluster only represents the L2 cache domain but QEMU's smp.clusters is the CPU topology level. Linux kernel will also expose module level topology information in sysfs for x86. To avoid cluster ambiguity and keep a consistent CPU topology naming style with the Linux kernel, we introduce module level for x86. The module is, in existing hardware practice, the lowest layer that contains the core, while the cluster is able to have a higher topological scope than the module due to its nesting. Therefore, place the module between the cluster and the core: drawer/book/socket/die/cluster/module/core/thread With the above topological hierarchy order, introduce module level support in MachineState and MachineClass. [1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/ [2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
* @modules: the number of modules in one cluster
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
* @cores: the number of cores in one cluster
* @threads: the number of threads in one core
* @max_cpus: the maximum number of logical processors on the machine
*/
typedef struct CpuTopology {
unsigned int cpus;
unsigned int drawers;
unsigned int books;
unsigned int sockets;
unsigned int dies;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
unsigned int clusters;
hw/core/machine: Introduce the module as a CPU topology level In x86, module is the topology level above core, which contains a set of cores that share certain resources (in current products, the resource usually includes L2 cache, as well as module scoped features and MSRs). Though smp.clusters could also share the L2 cache resource [1], there are following reasons that drive us to introduce the new smp.modules: * As the CPU topology abstraction in device tree [2], cluster supports nesting (though currently QEMU hasn't support that). In contrast, (x86) module does not support nesting. * Due to nesting, there is great flexibility in sharing resources on cluster, rather than narrowing cluster down to sharing L2 (and L3 tags) as the lowest topology level that contains cores. * Flexible nesting of cluster allows it to correspond to any level between the x86 package and core. * In Linux kernel, x86's cluster only represents the L2 cache domain but QEMU's smp.clusters is the CPU topology level. Linux kernel will also expose module level topology information in sysfs for x86. To avoid cluster ambiguity and keep a consistent CPU topology naming style with the Linux kernel, we introduce module level for x86. The module is, in existing hardware practice, the lowest layer that contains the core, while the cluster is able to have a higher topological scope than the module due to its nesting. Therefore, place the module between the cluster and the core: drawer/book/socket/die/cluster/module/core/thread With the above topological hierarchy order, introduce module level support in MachineState and MachineClass. [1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/ [2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
unsigned int modules;
unsigned int cores;
unsigned int threads;
unsigned int max_cpus;
} CpuTopology;
/**
* MachineState:
*/
struct MachineState {
/*< private >*/
Object parent_obj;
/*< public >*/
void *fdt;
char *dtb;
char *dumpdtb;
int phandle_start;
char *dt_compatible;
bool dump_guest_core;
bool mem_merge;
bool require_guest_memfd;
bool usb;
bool usb_disabled;
char *firmware;
bool iommu;
bool suppress_vmdesc;
bool enable_graphics;
ConfidentialGuestSupport *cgs;
HostMemoryBackend *memdev;
/*
* convenience alias to ram_memdev_id backend memory region
* or to numa container memory region
*/
MemoryRegion *ram;
DeviceMemoryState *device_memory;
/*
* Included in MachineState for simplicity, but not supported
* unless machine_add_audiodev_property is called. Boards
* that have embedded audio devices can call it from the
* machine init function and forward the property to the device.
*/
char *audiodev;
ram_addr_t ram_size;
ram_addr_t maxram_size;
uint64_t ram_slots;
BootConfiguration boot_config;
char *kernel_filename;
char *kernel_cmdline;
char *initrd_filename;
vl.c: convert cpu_model to cpu type and set of global properties before machine_init() All machines that support user specified cpu_model either call cpu_generic_init() or cpu_class_by_name()/CPUClass::parse_features to parse feature string and to get CPU type to create. Which leads to code duplication and hard-codding default CPU model within machine_foo_init() code. Which makes it impossible to get CPU type before machine_init() is run. So instead of setting default CPUs models and doing parsing in target specific machine_foo_init() in various ways, provide a generic data driven cpu_model parsing before machine_init() is called. in follow up per target patches, it will allow to: * define default CPU type in consistent/generic manner per machine type and drop custom code that fallbacks to default if cpu_model is NULL * drop custom features parsing in targets and do it in centralized way. * for cases of cpu_generic_init(TYPE_BASE/DEFAULT_CPU, "some_cpu") replace it with cpu_create(machine->cpu_type) || cpu_create(TYPE_FOO) depending if CPU type is user settable or not. not doing useless parsing and clearly documenting where CPU model is user settable or fixed one. Patch allows machine subclasses to define default CPU type per machine class at class_init() time and if that is set generic code will parse cpu_model into a MachineState::cpu_type which will be used to create CPUs for that machine instance and allows gradual per board conversion. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Message-Id: <1505318697-77161-4-git-send-email-imammedo@redhat.com> Acked-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-09-13 19:04:55 +03:00
const char *cpu_type;
AccelState *accelerator;
CPUArchIdList *possible_cpus;
CpuTopology smp;
struct NVDIMMState *nvdimms_state;
struct NumaState *numa_state;
};
#define DEFINE_MACHINE(namestr, machine_initfn) \
static void machine_initfn##_class_init(ObjectClass *oc, void *data) \
{ \
MachineClass *mc = MACHINE_CLASS(oc); \
machine_initfn(mc); \
} \
static const TypeInfo machine_initfn##_typeinfo = { \
.name = MACHINE_TYPE_NAME(namestr), \
.parent = TYPE_MACHINE, \
.class_init = machine_initfn##_class_init, \
}; \
static void machine_initfn##_register_types(void) \
{ \
type_register_static(&machine_initfn##_typeinfo); \
} \
type_init(machine_initfn##_register_types)
extern GlobalProperty hw_compat_9_0[];
extern const size_t hw_compat_9_0_len;
extern GlobalProperty hw_compat_8_2[];
extern const size_t hw_compat_8_2_len;
extern GlobalProperty hw_compat_8_1[];
extern const size_t hw_compat_8_1_len;
extern GlobalProperty hw_compat_8_0[];
extern const size_t hw_compat_8_0_len;
extern GlobalProperty hw_compat_7_2[];
extern const size_t hw_compat_7_2_len;
extern GlobalProperty hw_compat_7_1[];
extern const size_t hw_compat_7_1_len;
extern GlobalProperty hw_compat_7_0[];
extern const size_t hw_compat_7_0_len;
extern GlobalProperty hw_compat_6_2[];
extern const size_t hw_compat_6_2_len;
extern GlobalProperty hw_compat_6_1[];
extern const size_t hw_compat_6_1_len;
extern GlobalProperty hw_compat_6_0[];
extern const size_t hw_compat_6_0_len;
extern GlobalProperty hw_compat_5_2[];
extern const size_t hw_compat_5_2_len;
extern GlobalProperty hw_compat_5_1[];
extern const size_t hw_compat_5_1_len;
extern GlobalProperty hw_compat_5_0[];
extern const size_t hw_compat_5_0_len;
extern GlobalProperty hw_compat_4_2[];
extern const size_t hw_compat_4_2_len;
extern GlobalProperty hw_compat_4_1[];
extern const size_t hw_compat_4_1_len;
extern GlobalProperty hw_compat_4_0[];
extern const size_t hw_compat_4_0_len;
extern GlobalProperty hw_compat_3_1[];
extern const size_t hw_compat_3_1_len;
extern GlobalProperty hw_compat_3_0[];
extern const size_t hw_compat_3_0_len;
extern GlobalProperty hw_compat_2_12[];
extern const size_t hw_compat_2_12_len;
extern GlobalProperty hw_compat_2_11[];
extern const size_t hw_compat_2_11_len;
extern GlobalProperty hw_compat_2_10[];
extern const size_t hw_compat_2_10_len;
extern GlobalProperty hw_compat_2_9[];
extern const size_t hw_compat_2_9_len;
extern GlobalProperty hw_compat_2_8[];
extern const size_t hw_compat_2_8_len;
extern GlobalProperty hw_compat_2_7[];
extern const size_t hw_compat_2_7_len;
extern GlobalProperty hw_compat_2_6[];
extern const size_t hw_compat_2_6_len;
extern GlobalProperty hw_compat_2_5[];
extern const size_t hw_compat_2_5_len;
extern GlobalProperty hw_compat_2_4[];
extern const size_t hw_compat_2_4_len;
extern GlobalProperty hw_compat_2_3[];
extern const size_t hw_compat_2_3_len;
extern GlobalProperty hw_compat_2_2[];
extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
#endif