qemu/hw/core/machine-smp.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

399 lines
14 KiB
C
Raw Permalink Normal View History

/*
* QEMU Machine core (related to -smp parsing)
*
* Copyright (c) 2021 Huawei Technologies Co., Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/boards.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
/*
* Report information of a machine's supported CPU topology hierarchy.
* Topology members will be ordered from the largest to the smallest
* in the string.
*/
static char *cpu_hierarchy_to_string(MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
GString *s = g_string_new(NULL);
if (mc->smp_props.drawers_supported) {
g_string_append_printf(s, "drawers (%u) * ", ms->smp.drawers);
}
if (mc->smp_props.books_supported) {
g_string_append_printf(s, "books (%u) * ", ms->smp.books);
}
g_string_append_printf(s, "sockets (%u)", ms->smp.sockets);
if (mc->smp_props.dies_supported) {
g_string_append_printf(s, " * dies (%u)", ms->smp.dies);
}
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
if (mc->smp_props.clusters_supported) {
g_string_append_printf(s, " * clusters (%u)", ms->smp.clusters);
}
if (mc->smp_props.modules_supported) {
g_string_append_printf(s, " * modules (%u)", ms->smp.modules);
}
g_string_append_printf(s, " * cores (%u)", ms->smp.cores);
g_string_append_printf(s, " * threads (%u)", ms->smp.threads);
return g_string_free(s, false);
}
/*
* machine_parse_smp_config: Generic function used to parse the given
* SMP configuration
*
* Any missing parameter in "cpus/maxcpus/sockets/cores/threads" will be
* automatically computed based on the provided ones.
*
* In the calculation of omitted sockets/cores/threads: we prefer sockets
* over cores over threads before 6.2, while preferring cores over sockets
* over threads since 6.2.
*
* In the calculation of cpus/maxcpus: When both maxcpus and cpus are omitted,
* maxcpus will be computed from the given parameters and cpus will be set
* equal to maxcpus. When only one of maxcpus and cpus is given then the
* omitted one will be set to its given counterpart's value. Both maxcpus and
* cpus may be specified, but maxcpus must be equal to or greater than cpus.
*
* For compatibility, apart from the parameters that will be computed, newly
* introduced topology members which are likely to be target specific should
* be directly set as 1 if they are omitted (e.g. dies for PC since 4.1).
*/
void machine_parse_smp_config(MachineState *ms,
const SMPConfiguration *config, Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
unsigned cpus = config->has_cpus ? config->cpus : 0;
unsigned drawers = config->has_drawers ? config->drawers : 0;
unsigned books = config->has_books ? config->books : 0;
unsigned sockets = config->has_sockets ? config->sockets : 0;
unsigned dies = config->has_dies ? config->dies : 0;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
unsigned clusters = config->has_clusters ? config->clusters : 0;
unsigned modules = config->has_modules ? config->modules : 0;
unsigned cores = config->has_cores ? config->cores : 0;
unsigned threads = config->has_threads ? config->threads : 0;
unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
unsigned total_cpus;
/*
* Specified CPU topology parameters must be greater than zero,
* explicit configuration like "cpus=0" is not allowed.
*/
if ((config->has_cpus && config->cpus == 0) ||
(config->has_drawers && config->drawers == 0) ||
(config->has_books && config->books == 0) ||
(config->has_sockets && config->sockets == 0) ||
(config->has_dies && config->dies == 0) ||
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
(config->has_clusters && config->clusters == 0) ||
(config->has_modules && config->modules == 0) ||
(config->has_cores && config->cores == 0) ||
(config->has_threads && config->threads == 0) ||
(config->has_maxcpus && config->maxcpus == 0)) {
error_setg(errp, "Invalid CPU topology: "
"CPU topology parameters must be greater than zero");
return;
}
/*
hw/core: allow parameter=1 for SMP topology on any machine This effectively reverts commit 54c4ea8f3ae614054079395842128a856a73dbf9 Author: Zhao Liu <zhao1.liu@intel.com> Date: Sat Mar 9 00:01:37 2024 +0800 hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations but is not done as a 'git revert' since the part of the changes to the file hw/core/machine-smp.c which add 'has_XXX' checks remain desirable. Furthermore, we have to tweak the subsequently added unit test to account for differing warning message. The rationale for the original deprecation was: "Currently, it was allowed for users to specify the unsupported topology parameter as "1". For example, x86 PC machine doesn't support drawer/book/cluster topology levels, but user could specify "-smp drawers=1,books=1,clusters=1". This is meaningless and confusing, so that the support for this kind of configurations is marked deprecated since 9.0." There are varying POVs on the topic of 'unsupported' topology levels. It is common to say that on a system without hyperthreading, that there is always 1 thread. Likewise when new CPUs introduced a concept of multiple "dies', it was reasonable to say that all historical CPUs before that implicitly had 1 'die'. Likewise for the more recently introduced 'modules' and 'clusters' parameter'. From this POV, it is valid to set 'parameter=1' on the -smp command line for any machine, only a value > 1 is strictly an error condition. It doesn't cause any functional difficulty for QEMU, because internally the QEMU code is itself assuming that all "unsupported" parameters implicitly have a value of '1'. At the libvirt level, we've allowed applications to set 'parameter=1' when configuring a guest, and pass that through to QEMU. Deprecating this creates extra difficulty for because there's no info exposed from QEMU about which machine types "support" which parameters. Thus, libvirt can't know whether it is valid to pass 'parameter=1' for a given machine type, or whether it will trigger deprecation messages. Since there's no apparent functional benefit to deleting this deprecated behaviour from QEMU, and it creates problems for consumers of QEMU, remove this deprecation. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> Message-ID: <20240513123358.612355-2-berrange@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-05-13 15:33:57 +03:00
* If not supported by the machine, a topology parameter must
* not be set to a value greater than 1.
*/
hw/core: allow parameter=1 for SMP topology on any machine This effectively reverts commit 54c4ea8f3ae614054079395842128a856a73dbf9 Author: Zhao Liu <zhao1.liu@intel.com> Date: Sat Mar 9 00:01:37 2024 +0800 hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations but is not done as a 'git revert' since the part of the changes to the file hw/core/machine-smp.c which add 'has_XXX' checks remain desirable. Furthermore, we have to tweak the subsequently added unit test to account for differing warning message. The rationale for the original deprecation was: "Currently, it was allowed for users to specify the unsupported topology parameter as "1". For example, x86 PC machine doesn't support drawer/book/cluster topology levels, but user could specify "-smp drawers=1,books=1,clusters=1". This is meaningless and confusing, so that the support for this kind of configurations is marked deprecated since 9.0." There are varying POVs on the topic of 'unsupported' topology levels. It is common to say that on a system without hyperthreading, that there is always 1 thread. Likewise when new CPUs introduced a concept of multiple "dies', it was reasonable to say that all historical CPUs before that implicitly had 1 'die'. Likewise for the more recently introduced 'modules' and 'clusters' parameter'. From this POV, it is valid to set 'parameter=1' on the -smp command line for any machine, only a value > 1 is strictly an error condition. It doesn't cause any functional difficulty for QEMU, because internally the QEMU code is itself assuming that all "unsupported" parameters implicitly have a value of '1'. At the libvirt level, we've allowed applications to set 'parameter=1' when configuring a guest, and pass that through to QEMU. Deprecating this creates extra difficulty for because there's no info exposed from QEMU about which machine types "support" which parameters. Thus, libvirt can't know whether it is valid to pass 'parameter=1' for a given machine type, or whether it will trigger deprecation messages. Since there's no apparent functional benefit to deleting this deprecated behaviour from QEMU, and it creates problems for consumers of QEMU, remove this deprecation. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> Message-ID: <20240513123358.612355-2-berrange@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-05-13 15:33:57 +03:00
if (!mc->smp_props.modules_supported &&
config->has_modules && config->modules > 1) {
error_setg(errp,
"modules > 1 not supported by this machine's CPU topology");
return;
}
modules = modules > 0 ? modules : 1;
hw/core: allow parameter=1 for SMP topology on any machine This effectively reverts commit 54c4ea8f3ae614054079395842128a856a73dbf9 Author: Zhao Liu <zhao1.liu@intel.com> Date: Sat Mar 9 00:01:37 2024 +0800 hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations but is not done as a 'git revert' since the part of the changes to the file hw/core/machine-smp.c which add 'has_XXX' checks remain desirable. Furthermore, we have to tweak the subsequently added unit test to account for differing warning message. The rationale for the original deprecation was: "Currently, it was allowed for users to specify the unsupported topology parameter as "1". For example, x86 PC machine doesn't support drawer/book/cluster topology levels, but user could specify "-smp drawers=1,books=1,clusters=1". This is meaningless and confusing, so that the support for this kind of configurations is marked deprecated since 9.0." There are varying POVs on the topic of 'unsupported' topology levels. It is common to say that on a system without hyperthreading, that there is always 1 thread. Likewise when new CPUs introduced a concept of multiple "dies', it was reasonable to say that all historical CPUs before that implicitly had 1 'die'. Likewise for the more recently introduced 'modules' and 'clusters' parameter'. From this POV, it is valid to set 'parameter=1' on the -smp command line for any machine, only a value > 1 is strictly an error condition. It doesn't cause any functional difficulty for QEMU, because internally the QEMU code is itself assuming that all "unsupported" parameters implicitly have a value of '1'. At the libvirt level, we've allowed applications to set 'parameter=1' when configuring a guest, and pass that through to QEMU. Deprecating this creates extra difficulty for because there's no info exposed from QEMU about which machine types "support" which parameters. Thus, libvirt can't know whether it is valid to pass 'parameter=1' for a given machine type, or whether it will trigger deprecation messages. Since there's no apparent functional benefit to deleting this deprecated behaviour from QEMU, and it creates problems for consumers of QEMU, remove this deprecation. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> Message-ID: <20240513123358.612355-2-berrange@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-05-13 15:33:57 +03:00
if (!mc->smp_props.clusters_supported &&
config->has_clusters && config->clusters > 1) {
error_setg(errp,
"clusters > 1 not supported by this machine's CPU topology");
return;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
}
clusters = clusters > 0 ? clusters : 1;
hw/core: allow parameter=1 for SMP topology on any machine This effectively reverts commit 54c4ea8f3ae614054079395842128a856a73dbf9 Author: Zhao Liu <zhao1.liu@intel.com> Date: Sat Mar 9 00:01:37 2024 +0800 hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations but is not done as a 'git revert' since the part of the changes to the file hw/core/machine-smp.c which add 'has_XXX' checks remain desirable. Furthermore, we have to tweak the subsequently added unit test to account for differing warning message. The rationale for the original deprecation was: "Currently, it was allowed for users to specify the unsupported topology parameter as "1". For example, x86 PC machine doesn't support drawer/book/cluster topology levels, but user could specify "-smp drawers=1,books=1,clusters=1". This is meaningless and confusing, so that the support for this kind of configurations is marked deprecated since 9.0." There are varying POVs on the topic of 'unsupported' topology levels. It is common to say that on a system without hyperthreading, that there is always 1 thread. Likewise when new CPUs introduced a concept of multiple "dies', it was reasonable to say that all historical CPUs before that implicitly had 1 'die'. Likewise for the more recently introduced 'modules' and 'clusters' parameter'. From this POV, it is valid to set 'parameter=1' on the -smp command line for any machine, only a value > 1 is strictly an error condition. It doesn't cause any functional difficulty for QEMU, because internally the QEMU code is itself assuming that all "unsupported" parameters implicitly have a value of '1'. At the libvirt level, we've allowed applications to set 'parameter=1' when configuring a guest, and pass that through to QEMU. Deprecating this creates extra difficulty for because there's no info exposed from QEMU about which machine types "support" which parameters. Thus, libvirt can't know whether it is valid to pass 'parameter=1' for a given machine type, or whether it will trigger deprecation messages. Since there's no apparent functional benefit to deleting this deprecated behaviour from QEMU, and it creates problems for consumers of QEMU, remove this deprecation. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> Message-ID: <20240513123358.612355-2-berrange@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-05-13 15:33:57 +03:00
if (!mc->smp_props.dies_supported &&
config->has_dies && config->dies > 1) {
error_setg(errp,
"dies > 1 not supported by this machine's CPU topology");
return;
}
dies = dies > 0 ? dies : 1;
hw/core: allow parameter=1 for SMP topology on any machine This effectively reverts commit 54c4ea8f3ae614054079395842128a856a73dbf9 Author: Zhao Liu <zhao1.liu@intel.com> Date: Sat Mar 9 00:01:37 2024 +0800 hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations but is not done as a 'git revert' since the part of the changes to the file hw/core/machine-smp.c which add 'has_XXX' checks remain desirable. Furthermore, we have to tweak the subsequently added unit test to account for differing warning message. The rationale for the original deprecation was: "Currently, it was allowed for users to specify the unsupported topology parameter as "1". For example, x86 PC machine doesn't support drawer/book/cluster topology levels, but user could specify "-smp drawers=1,books=1,clusters=1". This is meaningless and confusing, so that the support for this kind of configurations is marked deprecated since 9.0." There are varying POVs on the topic of 'unsupported' topology levels. It is common to say that on a system without hyperthreading, that there is always 1 thread. Likewise when new CPUs introduced a concept of multiple "dies', it was reasonable to say that all historical CPUs before that implicitly had 1 'die'. Likewise for the more recently introduced 'modules' and 'clusters' parameter'. From this POV, it is valid to set 'parameter=1' on the -smp command line for any machine, only a value > 1 is strictly an error condition. It doesn't cause any functional difficulty for QEMU, because internally the QEMU code is itself assuming that all "unsupported" parameters implicitly have a value of '1'. At the libvirt level, we've allowed applications to set 'parameter=1' when configuring a guest, and pass that through to QEMU. Deprecating this creates extra difficulty for because there's no info exposed from QEMU about which machine types "support" which parameters. Thus, libvirt can't know whether it is valid to pass 'parameter=1' for a given machine type, or whether it will trigger deprecation messages. Since there's no apparent functional benefit to deleting this deprecated behaviour from QEMU, and it creates problems for consumers of QEMU, remove this deprecation. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> Message-ID: <20240513123358.612355-2-berrange@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-05-13 15:33:57 +03:00
if (!mc->smp_props.books_supported &&
config->has_books && config->books > 1) {
error_setg(errp,
"books > 1 not supported by this machine's CPU topology");
return;
}
books = books > 0 ? books : 1;
hw/core: allow parameter=1 for SMP topology on any machine This effectively reverts commit 54c4ea8f3ae614054079395842128a856a73dbf9 Author: Zhao Liu <zhao1.liu@intel.com> Date: Sat Mar 9 00:01:37 2024 +0800 hw/core/machine-smp: Deprecate unsupported "parameter=1" SMP configurations but is not done as a 'git revert' since the part of the changes to the file hw/core/machine-smp.c which add 'has_XXX' checks remain desirable. Furthermore, we have to tweak the subsequently added unit test to account for differing warning message. The rationale for the original deprecation was: "Currently, it was allowed for users to specify the unsupported topology parameter as "1". For example, x86 PC machine doesn't support drawer/book/cluster topology levels, but user could specify "-smp drawers=1,books=1,clusters=1". This is meaningless and confusing, so that the support for this kind of configurations is marked deprecated since 9.0." There are varying POVs on the topic of 'unsupported' topology levels. It is common to say that on a system without hyperthreading, that there is always 1 thread. Likewise when new CPUs introduced a concept of multiple "dies', it was reasonable to say that all historical CPUs before that implicitly had 1 'die'. Likewise for the more recently introduced 'modules' and 'clusters' parameter'. From this POV, it is valid to set 'parameter=1' on the -smp command line for any machine, only a value > 1 is strictly an error condition. It doesn't cause any functional difficulty for QEMU, because internally the QEMU code is itself assuming that all "unsupported" parameters implicitly have a value of '1'. At the libvirt level, we've allowed applications to set 'parameter=1' when configuring a guest, and pass that through to QEMU. Deprecating this creates extra difficulty for because there's no info exposed from QEMU about which machine types "support" which parameters. Thus, libvirt can't know whether it is valid to pass 'parameter=1' for a given machine type, or whether it will trigger deprecation messages. Since there's no apparent functional benefit to deleting this deprecated behaviour from QEMU, and it creates problems for consumers of QEMU, remove this deprecation. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> Message-ID: <20240513123358.612355-2-berrange@redhat.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-05-13 15:33:57 +03:00
if (!mc->smp_props.drawers_supported &&
config->has_drawers && config->drawers > 1) {
error_setg(errp,
"drawers > 1 not supported by this machine's CPU topology");
return;
}
drawers = drawers > 0 ? drawers : 1;
/* compute missing values based on the provided ones */
if (cpus == 0 && maxcpus == 0) {
sockets = sockets > 0 ? sockets : 1;
cores = cores > 0 ? cores : 1;
threads = threads > 0 ? threads : 1;
} else {
maxcpus = maxcpus > 0 ? maxcpus : cpus;
if (mc->smp_props.prefer_sockets) {
/* prefer sockets over cores before 6.2 */
if (sockets == 0) {
cores = cores > 0 ? cores : 1;
threads = threads > 0 ? threads : 1;
sockets = maxcpus /
(drawers * books * dies * clusters *
modules * cores * threads);
} else if (cores == 0) {
threads = threads > 0 ? threads : 1;
cores = maxcpus /
(drawers * books * sockets * dies *
clusters * modules * threads);
}
} else {
/* prefer cores over sockets since 6.2 */
if (cores == 0) {
sockets = sockets > 0 ? sockets : 1;
threads = threads > 0 ? threads : 1;
cores = maxcpus /
(drawers * books * sockets * dies *
clusters * modules * threads);
} else if (sockets == 0) {
threads = threads > 0 ? threads : 1;
sockets = maxcpus /
(drawers * books * dies * clusters *
modules * cores * threads);
}
}
/* try to calculate omitted threads at last */
if (threads == 0) {
threads = maxcpus /
(drawers * books * sockets * dies *
clusters * modules * cores);
}
}
total_cpus = drawers * books * sockets * dies *
clusters * modules * cores * threads;
maxcpus = maxcpus > 0 ? maxcpus : total_cpus;
cpus = cpus > 0 ? cpus : maxcpus;
ms->smp.cpus = cpus;
ms->smp.drawers = drawers;
ms->smp.books = books;
ms->smp.sockets = sockets;
ms->smp.dies = dies;
hw/core/machine: Introduce CPU cluster topology support The new Cluster-Aware Scheduling support has landed in Linux 5.16, which has been proved to benefit the scheduling performance (e.g. load balance and wake_affine strategy) on both x86_64 and AArch64. So now in Linux 5.16 we have four-level arch-neutral CPU topology definition like below and a new scheduler level for clusters. struct cpu_topology { int thread_id; int core_id; int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; cpumask_t llc_sibling; } A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. In virtualization, on the Hosts which have pClusters (physical clusters), if we can design a vCPU topology with cluster level for guest kernel and have a dedicated vCPU pinning. A Cluster-Aware Guest kernel can also make use of the cache affinity of CPU clusters to gain similar scheduling performance. This patch adds infrastructure for CPU cluster level topology configuration and parsing, so that the user can specify cluster parameter if their machines support it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> [PMD: Added '(since 7.0)' to @clusters in qapi/machine.json] Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 12:22:09 +03:00
ms->smp.clusters = clusters;
ms->smp.modules = modules;
ms->smp.cores = cores;
ms->smp.threads = threads;
ms->smp.max_cpus = maxcpus;
mc->smp_props.has_clusters = config->has_clusters;
/* sanity-check of the computed topology */
if (total_cpus != maxcpus) {
g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
error_setg(errp, "Invalid CPU topology: "
"product of the hierarchy must match maxcpus: "
"%s != maxcpus (%u)",
topo_msg, maxcpus);
return;
}
if (maxcpus < cpus) {
g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
error_setg(errp, "Invalid CPU topology: "
"maxcpus must be equal to or greater than smp: "
"%s == maxcpus (%u) < smp_cpus (%u)",
topo_msg, maxcpus, cpus);
return;
}
if (ms->smp.cpus < mc->min_cpus) {
error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
"supported by machine '%s' is %d",
ms->smp.cpus,
mc->name, mc->min_cpus);
return;
}
if (ms->smp.max_cpus > mc->max_cpus) {
error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
"supported by machine '%s' is %d",
ms->smp.max_cpus,
mc->name, mc->max_cpus);
return;
}
}
static bool machine_check_topo_support(MachineState *ms,
CpuTopologyLevel topo,
Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
if ((topo == CPU_TOPOLOGY_LEVEL_MODULE && !mc->smp_props.modules_supported) ||
(topo == CPU_TOPOLOGY_LEVEL_CLUSTER && !mc->smp_props.clusters_supported) ||
(topo == CPU_TOPOLOGY_LEVEL_DIE && !mc->smp_props.dies_supported) ||
(topo == CPU_TOPOLOGY_LEVEL_BOOK && !mc->smp_props.books_supported) ||
(topo == CPU_TOPOLOGY_LEVEL_DRAWER && !mc->smp_props.drawers_supported)) {
error_setg(errp,
"Invalid topology level: %s. "
"The topology level is not supported by this machine",
CpuTopologyLevel_str(topo));
return false;
}
return true;
}
qapi/qom: Define cache enumeration and properties for machine The x86 and ARM need to allow user to configure cache properties (current only topology): * For x86, the default cache topology model (of max/host CPU) does not always match the Host's real physical cache topology. Performance can increase when the configured virtual topology is closer to the physical topology than a default topology would be. * For ARM, QEMU can't get the cache topology information from the CPU registers, then user configuration is necessary. Additionally, the cache information is also needed for MPAM emulation (for TCG) to build the right PPTT. Define smp-cache related enumeration and properties in QAPI, so that user could configure cache properties for SMP system through -machine in the subsequent patch. Cache enumeration (CacheLevelAndType) is implemented as the combination of cache level (level 1/2/3) and cache type (data/instruction/unified). Currently, separated L1 cache (L1 data cache and L1 instruction cache) with unified higher-level cache (e.g., unified L2 and L3 caches), is the most common cache architectures. Therefore, enumerate the L1 D-cache, L1 I-cache, L2 cache and L3 cache with smp-cache object to add the basic cache topology support. Other kinds of caches (e.g., L1 unified or L2/L3 separated caches) can be added directly into CacheLevelAndType if necessary. Cache properties (SmpCacheProperties) currently only contains cache topology information, and other cache properties can be added in it if necessary. Note, define cache topology based on CPU topology level with two reasons: 1. In practice, a cache will always be bound to the CPU container (either private in the CPU container or shared among multiple containers), and CPU container is often expressed in terms of CPU topology level. 2. The x86's cache-related CPUIDs encode cache topology based on APIC ID's CPU topology layout. And the ACPI PPTT table that ARM/RISCV relies on also requires CPU containers to help indicate the private shared hierarchy of the cache. Therefore, for SMP systems, it is natural to use the CPU topology hierarchy directly in QEMU to define the cache topology. With smp-cache QAPI support, add smp cache topology for machine by parsing the smp-cache object list. Also add the helper to access/update cache topology level of machine. Suggested-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Message-ID: <20241101083331.340178-4-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-11-01 11:33:25 +03:00
bool machine_parse_smp_cache(MachineState *ms,
const SmpCachePropertiesList *caches,
Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
qapi/qom: Define cache enumeration and properties for machine The x86 and ARM need to allow user to configure cache properties (current only topology): * For x86, the default cache topology model (of max/host CPU) does not always match the Host's real physical cache topology. Performance can increase when the configured virtual topology is closer to the physical topology than a default topology would be. * For ARM, QEMU can't get the cache topology information from the CPU registers, then user configuration is necessary. Additionally, the cache information is also needed for MPAM emulation (for TCG) to build the right PPTT. Define smp-cache related enumeration and properties in QAPI, so that user could configure cache properties for SMP system through -machine in the subsequent patch. Cache enumeration (CacheLevelAndType) is implemented as the combination of cache level (level 1/2/3) and cache type (data/instruction/unified). Currently, separated L1 cache (L1 data cache and L1 instruction cache) with unified higher-level cache (e.g., unified L2 and L3 caches), is the most common cache architectures. Therefore, enumerate the L1 D-cache, L1 I-cache, L2 cache and L3 cache with smp-cache object to add the basic cache topology support. Other kinds of caches (e.g., L1 unified or L2/L3 separated caches) can be added directly into CacheLevelAndType if necessary. Cache properties (SmpCacheProperties) currently only contains cache topology information, and other cache properties can be added in it if necessary. Note, define cache topology based on CPU topology level with two reasons: 1. In practice, a cache will always be bound to the CPU container (either private in the CPU container or shared among multiple containers), and CPU container is often expressed in terms of CPU topology level. 2. The x86's cache-related CPUIDs encode cache topology based on APIC ID's CPU topology layout. And the ACPI PPTT table that ARM/RISCV relies on also requires CPU containers to help indicate the private shared hierarchy of the cache. Therefore, for SMP systems, it is natural to use the CPU topology hierarchy directly in QEMU to define the cache topology. With smp-cache QAPI support, add smp cache topology for machine by parsing the smp-cache object list. Also add the helper to access/update cache topology level of machine. Suggested-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Message-ID: <20241101083331.340178-4-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-11-01 11:33:25 +03:00
const SmpCachePropertiesList *node;
DECLARE_BITMAP(caches_bitmap, CACHE_LEVEL_AND_TYPE__MAX);
for (node = caches; node; node = node->next) {
/* Prohibit users from repeating settings. */
if (test_bit(node->value->cache, caches_bitmap)) {
error_setg(errp,
"Invalid cache properties: %s. "
"The cache properties are duplicated",
CacheLevelAndType_str(node->value->cache));
return false;
}
machine_set_cache_topo_level(ms, node->value->cache,
node->value->topology);
set_bit(node->value->cache, caches_bitmap);
}
for (int i = 0; i < CACHE_LEVEL_AND_TYPE__MAX; i++) {
const SmpCacheProperties *props = &ms->smp_cache.props[i];
/*
* Reject non "default" topology level if the cache isn't
* supported by the machine.
*/
if (props->topology != CPU_TOPOLOGY_LEVEL_DEFAULT &&
!mc->smp_props.cache_supported[props->cache]) {
error_setg(errp,
"%s cache topology not supported by this machine",
CacheLevelAndType_str(node->value->cache));
return false;
}
if (!machine_check_topo_support(ms, props->topology, errp)) {
return false;
}
}
qapi/qom: Define cache enumeration and properties for machine The x86 and ARM need to allow user to configure cache properties (current only topology): * For x86, the default cache topology model (of max/host CPU) does not always match the Host's real physical cache topology. Performance can increase when the configured virtual topology is closer to the physical topology than a default topology would be. * For ARM, QEMU can't get the cache topology information from the CPU registers, then user configuration is necessary. Additionally, the cache information is also needed for MPAM emulation (for TCG) to build the right PPTT. Define smp-cache related enumeration and properties in QAPI, so that user could configure cache properties for SMP system through -machine in the subsequent patch. Cache enumeration (CacheLevelAndType) is implemented as the combination of cache level (level 1/2/3) and cache type (data/instruction/unified). Currently, separated L1 cache (L1 data cache and L1 instruction cache) with unified higher-level cache (e.g., unified L2 and L3 caches), is the most common cache architectures. Therefore, enumerate the L1 D-cache, L1 I-cache, L2 cache and L3 cache with smp-cache object to add the basic cache topology support. Other kinds of caches (e.g., L1 unified or L2/L3 separated caches) can be added directly into CacheLevelAndType if necessary. Cache properties (SmpCacheProperties) currently only contains cache topology information, and other cache properties can be added in it if necessary. Note, define cache topology based on CPU topology level with two reasons: 1. In practice, a cache will always be bound to the CPU container (either private in the CPU container or shared among multiple containers), and CPU container is often expressed in terms of CPU topology level. 2. The x86's cache-related CPUIDs encode cache topology based on APIC ID's CPU topology layout. And the ACPI PPTT table that ARM/RISCV relies on also requires CPU containers to help indicate the private shared hierarchy of the cache. Therefore, for SMP systems, it is natural to use the CPU topology hierarchy directly in QEMU to define the cache topology. With smp-cache QAPI support, add smp cache topology for machine by parsing the smp-cache object list. Also add the helper to access/update cache topology level of machine. Suggested-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Message-ID: <20241101083331.340178-4-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-11-01 11:33:25 +03:00
return true;
}
unsigned int machine_topo_get_cores_per_socket(const MachineState *ms)
{
hw/core/machine: Introduce the module as a CPU topology level In x86, module is the topology level above core, which contains a set of cores that share certain resources (in current products, the resource usually includes L2 cache, as well as module scoped features and MSRs). Though smp.clusters could also share the L2 cache resource [1], there are following reasons that drive us to introduce the new smp.modules: * As the CPU topology abstraction in device tree [2], cluster supports nesting (though currently QEMU hasn't support that). In contrast, (x86) module does not support nesting. * Due to nesting, there is great flexibility in sharing resources on cluster, rather than narrowing cluster down to sharing L2 (and L3 tags) as the lowest topology level that contains cores. * Flexible nesting of cluster allows it to correspond to any level between the x86 package and core. * In Linux kernel, x86's cluster only represents the L2 cache domain but QEMU's smp.clusters is the CPU topology level. Linux kernel will also expose module level topology information in sysfs for x86. To avoid cluster ambiguity and keep a consistent CPU topology naming style with the Linux kernel, we introduce module level for x86. The module is, in existing hardware practice, the lowest layer that contains the core, while the cluster is able to have a higher topological scope than the module due to its nesting. Therefore, place the module between the cluster and the core: drawer/book/socket/die/cluster/module/core/thread With the above topological hierarchy order, introduce module level support in MachineState and MachineClass. [1]: https://lore.kernel.org/qemu-devel/c3d68005-54e0-b8fe-8dc1-5989fe3c7e69@huawei.com/ [2]: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt Suggested-by: Xiaoyao Li <xiaoyao.li@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Babu Moger <babu.moger@amd.com> Message-ID: <20240424154929.1487382-2-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-04-24 18:49:09 +03:00
return ms->smp.cores * ms->smp.modules * ms->smp.clusters * ms->smp.dies;
}
unsigned int machine_topo_get_threads_per_socket(const MachineState *ms)
{
return ms->smp.threads * machine_topo_get_cores_per_socket(ms);
}
qapi/qom: Define cache enumeration and properties for machine The x86 and ARM need to allow user to configure cache properties (current only topology): * For x86, the default cache topology model (of max/host CPU) does not always match the Host's real physical cache topology. Performance can increase when the configured virtual topology is closer to the physical topology than a default topology would be. * For ARM, QEMU can't get the cache topology information from the CPU registers, then user configuration is necessary. Additionally, the cache information is also needed for MPAM emulation (for TCG) to build the right PPTT. Define smp-cache related enumeration and properties in QAPI, so that user could configure cache properties for SMP system through -machine in the subsequent patch. Cache enumeration (CacheLevelAndType) is implemented as the combination of cache level (level 1/2/3) and cache type (data/instruction/unified). Currently, separated L1 cache (L1 data cache and L1 instruction cache) with unified higher-level cache (e.g., unified L2 and L3 caches), is the most common cache architectures. Therefore, enumerate the L1 D-cache, L1 I-cache, L2 cache and L3 cache with smp-cache object to add the basic cache topology support. Other kinds of caches (e.g., L1 unified or L2/L3 separated caches) can be added directly into CacheLevelAndType if necessary. Cache properties (SmpCacheProperties) currently only contains cache topology information, and other cache properties can be added in it if necessary. Note, define cache topology based on CPU topology level with two reasons: 1. In practice, a cache will always be bound to the CPU container (either private in the CPU container or shared among multiple containers), and CPU container is often expressed in terms of CPU topology level. 2. The x86's cache-related CPUIDs encode cache topology based on APIC ID's CPU topology layout. And the ACPI PPTT table that ARM/RISCV relies on also requires CPU containers to help indicate the private shared hierarchy of the cache. Therefore, for SMP systems, it is natural to use the CPU topology hierarchy directly in QEMU to define the cache topology. With smp-cache QAPI support, add smp cache topology for machine by parsing the smp-cache object list. Also add the helper to access/update cache topology level of machine. Suggested-by: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> Tested-by: Yongwei Ma <yongwei.ma@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Message-ID: <20241101083331.340178-4-zhao1.liu@intel.com> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2024-11-01 11:33:25 +03:00
CpuTopologyLevel machine_get_cache_topo_level(const MachineState *ms,
CacheLevelAndType cache)
{
return ms->smp_cache.props[cache].topology;
}
void machine_set_cache_topo_level(MachineState *ms, CacheLevelAndType cache,
CpuTopologyLevel level)
{
ms->smp_cache.props[cache].topology = level;
}
/*
* When both cache1 and cache2 are configured with specific topology levels
* (not default level), is cache1's topology level higher than cache2?
*/
static bool smp_cache_topo_cmp(const SmpCache *smp_cache,
CacheLevelAndType cache1,
CacheLevelAndType cache2)
{
/*
* Before comparing, the "default" topology level should be replaced
* with the specific level.
*/
assert(smp_cache->props[cache1].topology != CPU_TOPOLOGY_LEVEL_DEFAULT);
return smp_cache->props[cache1].topology > smp_cache->props[cache2].topology;
}
/*
* Currently, we have no way to expose the arch-specific default cache model
* because the cache model is sometimes related to the CPU model (e.g., i386).
*
* We can only check the correctness of the cache topology after the arch loads
* the user-configured cache model from MachineState and consumes the special
* "default" level by replacing it with the specific level.
*/
bool machine_check_smp_cache(const MachineState *ms, Error **errp)
{
if (smp_cache_topo_cmp(&ms->smp_cache, CACHE_LEVEL_AND_TYPE_L1D,
CACHE_LEVEL_AND_TYPE_L2) ||
smp_cache_topo_cmp(&ms->smp_cache, CACHE_LEVEL_AND_TYPE_L1I,
CACHE_LEVEL_AND_TYPE_L2)) {
error_setg(errp,
"Invalid smp cache topology. "
"L2 cache topology level shouldn't be lower than L1 cache");
return false;
}
if (smp_cache_topo_cmp(&ms->smp_cache, CACHE_LEVEL_AND_TYPE_L2,
CACHE_LEVEL_AND_TYPE_L3)) {
error_setg(errp,
"Invalid smp cache topology. "
"L3 cache topology level shouldn't be lower than L2 cache");
return false;
}
return true;
}