numa: move source of default CPUs to NUMA node mapping into boards
Originally CPU threads were by default assigned in round-robin fashion. However it was causing issues in guest since CPU threads from the same socket/core could be placed on different NUMA nodes. Commitfb43b73b
(pc: fix default VCPU to NUMA node mapping) fixed it by grouping threads within a socket on the same node introducing cpu_index_to_socket_id() callback and commit20bb648d
(spapr: Fix default NUMA node allocation for threads) reused callback to fix similar issues for SPAPR machine even though socket doesn't make much sense there. As result QEMU ended up having 3 default distribution rules used by 3 targets /virt-arm, spapr, pc/. In effort of moving NUMA mapping for CPUs into possible_cpus, generalize default mapping in numa.c by making boards decide on default mapping and let them explicitly tell generic numa code to which node a CPU thread belongs to by replacing cpu_index_to_socket_id() with @cpu_index_to_instance_props() which provides default node_id assigned by board to specified cpu_index. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <1494415802-227633-2-git-send-email-imammedo@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
This commit is contained in:
parent
d9c34f9c6c
commit
ea089eebbd
@ -1539,6 +1539,16 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static CpuInstanceProperties
|
||||||
|
virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||||
|
{
|
||||||
|
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||||
|
const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
|
||||||
|
|
||||||
|
assert(cpu_index < possible_cpus->len);
|
||||||
|
return possible_cpus->cpus[cpu_index].props;
|
||||||
|
}
|
||||||
|
|
||||||
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
|
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
@ -1558,8 +1568,13 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
|
|||||||
ms->possible_cpus->cpus[n].props.has_thread_id = true;
|
ms->possible_cpus->cpus[n].props.has_thread_id = true;
|
||||||
ms->possible_cpus->cpus[n].props.thread_id = n;
|
ms->possible_cpus->cpus[n].props.thread_id = n;
|
||||||
|
|
||||||
/* TODO: add 'has_node/node' here to describe
|
/* default distribution of CPUs over NUMA nodes */
|
||||||
to which node core belongs */
|
if (nb_numa_nodes) {
|
||||||
|
/* preset values but do not enable them i.e. 'has_node_id = false',
|
||||||
|
* numa init code will enable them later if manual mapping wasn't
|
||||||
|
* present on CLI */
|
||||||
|
ms->possible_cpus->cpus[n].props.node_id = n % nb_numa_nodes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ms->possible_cpus;
|
return ms->possible_cpus;
|
||||||
}
|
}
|
||||||
@ -1581,6 +1596,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
|||||||
/* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
|
/* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
|
||||||
mc->minimum_page_bits = 12;
|
mc->minimum_page_bits = 12;
|
||||||
mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
|
mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
|
||||||
|
mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const TypeInfo virt_machine_info = {
|
static const TypeInfo virt_machine_info = {
|
||||||
|
23
hw/i386/pc.c
23
hw/i386/pc.c
@ -2243,12 +2243,14 @@ static void pc_machine_reset(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
|
static CpuInstanceProperties
|
||||||
|
pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
|
||||||
{
|
{
|
||||||
X86CPUTopoInfo topo;
|
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||||
x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
|
const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
|
||||||
&topo);
|
|
||||||
return topo.pkg_id;
|
assert(cpu_index < possible_cpus->len);
|
||||||
|
return possible_cpus->cpus[cpu_index].props;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
|
static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
|
||||||
@ -2280,6 +2282,15 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
|
|||||||
ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
|
ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
|
||||||
ms->possible_cpus->cpus[i].props.has_thread_id = true;
|
ms->possible_cpus->cpus[i].props.has_thread_id = true;
|
||||||
ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
|
ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
|
||||||
|
|
||||||
|
/* default distribution of CPUs over NUMA nodes */
|
||||||
|
if (nb_numa_nodes) {
|
||||||
|
/* preset values but do not enable them i.e. 'has_node_id = false',
|
||||||
|
* numa init code will enable them later if manual mapping wasn't
|
||||||
|
* present on CLI */
|
||||||
|
ms->possible_cpus->cpus[i].props.node_id =
|
||||||
|
topo.pkg_id % nb_numa_nodes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ms->possible_cpus;
|
return ms->possible_cpus;
|
||||||
}
|
}
|
||||||
@ -2322,7 +2333,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
|
|||||||
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
pcmc->acpi_data_size = 0x20000 + 0x8000;
|
||||||
pcmc->save_tsc_khz = true;
|
pcmc->save_tsc_khz = true;
|
||||||
mc->get_hotplug_handler = pc_get_hotpug_handler;
|
mc->get_hotplug_handler = pc_get_hotpug_handler;
|
||||||
mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
|
mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
|
||||||
mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
|
mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
|
||||||
mc->has_hotpluggable_cpus = true;
|
mc->has_hotpluggable_cpus = true;
|
||||||
mc->default_boot_order = "cad";
|
mc->default_boot_order = "cad";
|
||||||
|
@ -2981,11 +2981,18 @@ static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
|
static CpuInstanceProperties
|
||||||
|
spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
|
||||||
{
|
{
|
||||||
/* Allocate to NUMA nodes on a "socket" basis (not that concept of
|
CPUArchId *core_slot;
|
||||||
* socket means much for the paravirtualized PAPR platform) */
|
MachineClass *mc = MACHINE_GET_CLASS(machine);
|
||||||
return cpu_index / smp_threads / smp_cores;
|
|
||||||
|
/* make sure possible_cpu are intialized */
|
||||||
|
mc->possible_cpu_arch_ids(machine);
|
||||||
|
/* get CPU core slot containing thread that matches cpu_index */
|
||||||
|
core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
|
||||||
|
assert(core_slot);
|
||||||
|
return core_slot->props;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
|
static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
|
||||||
@ -3012,8 +3019,15 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
|
|||||||
machine->possible_cpus->cpus[i].arch_id = core_id;
|
machine->possible_cpus->cpus[i].arch_id = core_id;
|
||||||
machine->possible_cpus->cpus[i].props.has_core_id = true;
|
machine->possible_cpus->cpus[i].props.has_core_id = true;
|
||||||
machine->possible_cpus->cpus[i].props.core_id = core_id;
|
machine->possible_cpus->cpus[i].props.core_id = core_id;
|
||||||
/* TODO: add 'has_node/node' here to describe
|
|
||||||
to which node core belongs */
|
/* default distribution of CPUs over NUMA nodes */
|
||||||
|
if (nb_numa_nodes) {
|
||||||
|
/* preset values but do not enable them i.e. 'has_node_id = false',
|
||||||
|
* numa init code will enable them later if manual mapping wasn't
|
||||||
|
* present on CLI */
|
||||||
|
machine->possible_cpus->cpus[i].props.node_id =
|
||||||
|
core_id / smp_threads / smp_cores % nb_numa_nodes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return machine->possible_cpus;
|
return machine->possible_cpus;
|
||||||
}
|
}
|
||||||
@ -3138,7 +3152,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
|
|||||||
hc->pre_plug = spapr_machine_device_pre_plug;
|
hc->pre_plug = spapr_machine_device_pre_plug;
|
||||||
hc->plug = spapr_machine_device_plug;
|
hc->plug = spapr_machine_device_plug;
|
||||||
hc->unplug = spapr_machine_device_unplug;
|
hc->unplug = spapr_machine_device_unplug;
|
||||||
mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
|
mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
|
||||||
mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
|
mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
|
||||||
hc->unplug_request = spapr_machine_device_unplug_request;
|
hc->unplug_request = spapr_machine_device_unplug_request;
|
||||||
|
|
||||||
|
@ -74,7 +74,10 @@ typedef struct {
|
|||||||
* of HotplugHandler object, which handles hotplug operation
|
* of HotplugHandler object, which handles hotplug operation
|
||||||
* for a given @dev. It may return NULL if @dev doesn't require
|
* for a given @dev. It may return NULL if @dev doesn't require
|
||||||
* any actions to be performed by hotplug handler.
|
* any actions to be performed by hotplug handler.
|
||||||
* @cpu_index_to_socket_id:
|
* @cpu_index_to_instance_props:
|
||||||
|
* used to provide @cpu_index to socket/core/thread number mapping, allowing
|
||||||
|
* legacy code to perform maping from cpu_index to topology properties
|
||||||
|
* Returns: tuple of socket/core/thread ids given cpu_index belongs to.
|
||||||
* used to provide @cpu_index to socket number mapping, allowing
|
* used to provide @cpu_index to socket number mapping, allowing
|
||||||
* a machine to group CPU threads belonging to the same socket/package
|
* a machine to group CPU threads belonging to the same socket/package
|
||||||
* Returns: socket number given cpu_index belongs to.
|
* Returns: socket number given cpu_index belongs to.
|
||||||
@ -141,7 +144,8 @@ struct MachineClass {
|
|||||||
|
|
||||||
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
|
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
|
||||||
DeviceState *dev);
|
DeviceState *dev);
|
||||||
unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
|
CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
|
||||||
|
unsigned cpu_index);
|
||||||
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
|
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ struct node_info {
|
|||||||
};
|
};
|
||||||
|
|
||||||
extern NodeInfo numa_info[MAX_NODES];
|
extern NodeInfo numa_info[MAX_NODES];
|
||||||
void parse_numa_opts(MachineClass *mc);
|
void parse_numa_opts(MachineState *ms);
|
||||||
void numa_post_machine_init(void);
|
void numa_post_machine_init(void);
|
||||||
void query_numa_node_mem(uint64_t node_mem[]);
|
void query_numa_node_mem(uint64_t node_mem[]);
|
||||||
extern QemuOptsList qemu_numa_opts;
|
extern QemuOptsList qemu_numa_opts;
|
||||||
|
24
numa.c
24
numa.c
@ -443,9 +443,10 @@ void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
|
|||||||
nodes[i].node_mem = size - usedmem;
|
nodes[i].node_mem = size - usedmem;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_numa_opts(MachineClass *mc)
|
void parse_numa_opts(MachineState *ms)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||||
|
|
||||||
for (i = 0; i < MAX_NODES; i++) {
|
for (i = 0; i < MAX_NODES; i++) {
|
||||||
numa_info[i].node_cpu = bitmap_new(max_cpus);
|
numa_info[i].node_cpu = bitmap_new(max_cpus);
|
||||||
@ -511,21 +512,18 @@ void parse_numa_opts(MachineClass *mc)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Historically VCPUs were assigned in round-robin order to NUMA
|
|
||||||
* nodes. However it causes issues with guest not handling it nice
|
/* assign CPUs to nodes using board provided default mapping */
|
||||||
* in case where cores/threads from a multicore CPU appear on
|
if (!mc->cpu_index_to_instance_props) {
|
||||||
* different nodes. So allow boards to override default distribution
|
error_report("default CPUs to NUMA node mapping isn't supported");
|
||||||
* rule grouping VCPUs by socket so that VCPUs from the same socket
|
exit(1);
|
||||||
* would be on the same node.
|
}
|
||||||
*/
|
|
||||||
if (i == nb_numa_nodes) {
|
if (i == nb_numa_nodes) {
|
||||||
for (i = 0; i < max_cpus; i++) {
|
for (i = 0; i < max_cpus; i++) {
|
||||||
unsigned node_id = i % nb_numa_nodes;
|
CpuInstanceProperties props;
|
||||||
if (mc->cpu_index_to_socket_id) {
|
props = mc->cpu_index_to_instance_props(ms, i);
|
||||||
node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
set_bit(i, numa_info[node_id].node_cpu);
|
set_bit(i, numa_info[props.node_id].node_cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
2
vl.c
2
vl.c
@ -4503,7 +4503,7 @@ int main(int argc, char **argv, char **envp)
|
|||||||
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
|
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
|
||||||
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
|
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
|
||||||
|
|
||||||
parse_numa_opts(machine_class);
|
parse_numa_opts(current_machine);
|
||||||
|
|
||||||
if (qemu_opts_foreach(qemu_find_opts("mon"),
|
if (qemu_opts_foreach(qemu_find_opts("mon"),
|
||||||
mon_init_func, NULL, NULL)) {
|
mon_init_func, NULL, NULL)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user