x86 and machine queue, 2017-05-11

Highlights:
 * New "-numa cpu" option
 * NUMA distance configuration
 * migration/i386 vmstatification
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABCAAGBQJZFLh3AAoJECgHk2+YTcWmppQQAJe9Y5a3VwHXqvHbwBHX2ysn
 RZDAUPd9DWpbM+UUydyKOVIZ7u5RXbbVq4E0NeCD8VYYd+grZB5Wo1cAzy3b4U2j
 2s+MDqaPMtZtGoqxTsyQOVoVxazT5Kf1zglK+iUEzik44J7LGdro+ty2Z7Ut2c11
 q9rE/GNS78czBm7c4lxgkxXW4N95K/tEGlLtDQ7uct//3U/ZimF+mO6GcbVFlOWT
 4iEbOz2sqvBVv22nLJRufiPgFNIW4hizAz5KBWxwGFCCKvT3N6yYNKKjzEpCw+jE
 lpjIRODU02yIZZZY841fLRtyrk7p4zORS8jRaHTdEJgb5bGc/YazxxVL8nzRQT1W
 VxFwAMd+UNrDkV24hpN++Ln2O+b3kwcGZ7uA/qu9d5WvSYUKXlHqcMJ35q6zuhAI
 /ecfYO7EZfVP86VjIt5IH04iV8RChA9Q6de+kQEFa6wHUxufeCOwCFqukGo8zj07
 plX8NcjnzYmSXKnYjHOHao4rKT+DiJhRB60rFiMeKP/qvKbZPjtgsIeonhHm53qZ
 /QwkhowahHKkpAnetIl0QHm8KS4YudAofMi/Fl+he4gRkEbSQVAo6iQb2L4cjcLC
 LNSDDsIVWGem4gCR+vcsFqB3lggRDfltHXm15JKh92UMpOr6RI6s8pD55T7EdnPC
 CfdxWB5kYM6/lLbOHj94
 =48wH
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'ehabkost/tags/x86-and-machine-pull-request' into staging

x86 and machine queue, 2017-05-11

Highlights:
* New "-numa cpu" option
* NUMA distance configuration
* migration/i386 vmstatification

# gpg: Signature made Thu 11 May 2017 08:16:07 PM BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# gpg: Note: This key has expired!
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* ehabkost/tags/x86-and-machine-pull-request: (29 commits)
  migration/i386: Remove support for pre-0.12 formats
  vmstatification: i386 FPReg
  migration/i386: Remove old non-softfloat 64bit FP support
  tests: check -numa node,cpu=props_list usecase
  numa: add '-numa cpu,...' option for property based node mapping
  numa: remove node_cpu bitmaps as they are no longer used
  numa: use possible_cpus for not mapped CPUs check
  machine: call machine init from wrapper
  numa: remove no longer need numa_post_machine_init()
  tests: numa: add case for QMP command query-cpus
  QMP: include CpuInstanceProperties into query_cpus output output
  virt-arm: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu()
  spapr: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu()
  pc: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu()
  numa: do default mapping based on possible_cpus instead of node_cpu bitmaps
  numa: mirror cpu to node mapping in MachineState::possible_cpus
  numa: add check that board supports cpu_index to node mapping
  virt-arm: add node-id property to CPU
  pc: add node-id property to CPU
  spapr: add node-id property to sPAPR core
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2017-05-15 14:11:55 +01:00
commit ba9915e1f8
36 changed files with 1153 additions and 426 deletions

10
cpus.c
View File

@ -50,6 +50,7 @@
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
#include "hw/boards.h"
#ifdef CONFIG_LINUX
@ -1865,6 +1866,8 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
CpuInfoList *qmp_query_cpus(Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
MachineClass *mc = MACHINE_GET_CLASS(ms);
CpuInfoList *head = NULL, *cur_item = NULL;
CPUState *cpu;
@ -1915,6 +1918,13 @@ CpuInfoList *qmp_query_cpus(Error **errp)
#else
info->value->arch = CPU_INFO_ARCH_OTHER;
#endif
info->value->has_props = !!mc->cpu_index_to_instance_props;
if (info->value->has_props) {
CpuInstanceProperties *props;
props = g_malloc0(sizeof(*props));
*props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
info->value->props = props;
}
/* XXX: waiting for the qapi to support GSList */
if (!cur_item) {

View File

@ -24,6 +24,7 @@
#include "hw/acpi/aml-build.h"
#include "qemu/bswap.h"
#include "qemu/bitops.h"
#include "sysemu/numa.h"
static GArray *build_alloc_array(void)
{
@ -1609,3 +1610,28 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
numamem->base_addr = cpu_to_le64(base);
numamem->range_length = cpu_to_le64(len);
}
/*
* ACPI spec 5.2.17 System Locality Distance Information Table
* (Revision 2.0 or later)
*/
void build_slit(GArray *table_data, BIOSLinker *linker)
{
int slit_start, i, j;
slit_start = table_data->len;
acpi_data_push(table_data, sizeof(AcpiTableHeader));
build_append_int_noprefix(table_data, nb_numa_nodes, 8);
for (i = 0; i < nb_numa_nodes; i++) {
for (j = 0; j < nb_numa_nodes; j++) {
assert(numa_info[i].distance[j]);
build_append_int_noprefix(table_data, numa_info[i].distance[j], 1);
}
}
build_header(linker, table_data,
(void *)(table_data->data + slit_start),
"SLIT",
table_data->len - slit_start, 1, NULL, NULL);
}

View File

@ -503,7 +503,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
/* build Processor object for each processor */
for (i = 0; i < arch_ids->len; i++) {
int j;
Aml *dev;
Aml *uid = aml_int(i);
GArray *madt_buf = g_array_new(0, 1, 1);
@ -557,9 +556,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
* as a result _PXM is required for all CPUs which might
* be hot-plugged. For simplicity, add it for all CPUs.
*/
j = numa_get_node_for_cpu(i);
if (j < nb_numa_nodes) {
aml_append(dev, aml_name_decl("_PXM", aml_int(j)));
if (arch_ids->cpus[i].props.has_node_id) {
aml_append(dev, aml_name_decl("_PXM",
aml_int(arch_ids->cpus[i].props.node_id)));
}
aml_append(cpus_dev, dev);

View File

@ -486,30 +486,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
AcpiSystemResourceAffinityTable *srat;
AcpiSratProcessorGiccAffinity *core;
AcpiSratMemoryAffinity *numamem;
int i, j, srat_start;
int i, srat_start;
uint64_t mem_base;
uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t));
for (i = 0; i < vms->smp_cpus; i++) {
j = numa_get_node_for_cpu(i);
if (j < nb_numa_nodes) {
cpu_node[i] = j;
}
}
MachineClass *mc = MACHINE_GET_CLASS(vms);
const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms));
srat_start = table_data->len;
srat = acpi_data_push(table_data, sizeof(*srat));
srat->reserved1 = cpu_to_le32(1);
for (i = 0; i < vms->smp_cpus; ++i) {
for (i = 0; i < cpu_list->len; ++i) {
int node_id = cpu_list->cpus[i].props.has_node_id ?
cpu_list->cpus[i].props.node_id : 0;
core = acpi_data_push(table_data, sizeof(*core));
core->type = ACPI_SRAT_PROCESSOR_GICC;
core->length = sizeof(*core);
core->proximity = cpu_to_le32(cpu_node[i]);
core->proximity = cpu_to_le32(node_id);
core->acpi_processor_uid = cpu_to_le32(i);
core->flags = cpu_to_le32(1);
}
g_free(cpu_node);
mem_base = vms->memmap[VIRT_MEM].base;
for (i = 0; i < nb_numa_nodes; ++i) {

View File

@ -338,7 +338,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
{
int cpu;
int addr_cells = 1;
unsigned int i;
const MachineState *ms = MACHINE(vms);
/*
* From Documentation/devicetree/bindings/arm/cpus.txt
@ -369,6 +369,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
CPUState *cs = CPU(armcpu);
qemu_fdt_add_subnode(vms->fdt, nodename);
qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
@ -389,9 +390,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
armcpu->mp_affinity);
}
i = numa_get_node_for_cpu(cpu);
if (i < nb_numa_nodes) {
qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i);
if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id",
ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
}
g_free(nodename);
@ -1194,10 +1195,35 @@ void virt_machine_done(Notifier *notifier, void *data)
virt_build_smbios(vms);
}
static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
{
uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
if (!vmc->disallow_affinity_adjustment) {
/* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
* GIC's target-list limitations. 32-bit KVM hosts currently
* always create clusters of 4 CPUs, but that is expected to
* change when they gain support for gicv3. When KVM is enabled
* it will override the changes we make here, therefore our
* purposes are to make TCG consistent (with 64-bit KVM hosts)
* and to improve SGI efficiency.
*/
if (vms->gic_version == 3) {
clustersz = GICV3_TARGETLIST_BITS;
} else {
clustersz = GIC_TARGETLIST_BITS;
}
}
return arm_cpu_mp_affinity(idx, clustersz);
}
static void machvirt_init(MachineState *machine)
{
VirtMachineState *vms = VIRT_MACHINE(machine);
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
MachineClass *mc = MACHINE_GET_CLASS(machine);
const CPUArchIdList *possible_cpus;
qemu_irq pic[NUM_IRQS];
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *secure_sysmem = NULL;
@ -1210,7 +1236,6 @@ static void machvirt_init(MachineState *machine)
CPUClass *cc;
Error *err = NULL;
bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
uint8_t clustersz;
if (!cpu_model) {
cpu_model = "cortex-a15";
@ -1263,10 +1288,8 @@ static void machvirt_init(MachineState *machine)
*/
if (vms->gic_version == 3) {
virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
clustersz = GICV3_TARGETLIST_BITS;
} else {
virt_max_cpus = GIC_NCPU;
clustersz = GIC_TARGETLIST_BITS;
}
if (max_cpus > virt_max_cpus) {
@ -1324,21 +1347,35 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
for (n = 0; n < smp_cpus; n++) {
Object *cpuobj = object_new(typename);
if (!vmc->disallow_affinity_adjustment) {
/* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
* GIC's target-list limitations. 32-bit KVM hosts currently
* always create clusters of 4 CPUs, but that is expected to
* change when they gain support for gicv3. When KVM is enabled
* it will override the changes we make here, therefore our
* purposes are to make TCG consistent (with 64-bit KVM hosts)
* and to improve SGI efficiency.
*/
uint8_t aff1 = n / clustersz;
uint8_t aff0 = n % clustersz;
object_property_set_int(cpuobj, (aff1 << ARM_AFF1_SHIFT) | aff0,
"mp-affinity", NULL);
possible_cpus = mc->possible_cpu_arch_ids(machine);
for (n = 0; n < possible_cpus->len; n++) {
Object *cpuobj;
CPUState *cs;
int node_id;
if (n >= smp_cpus) {
break;
}
cpuobj = object_new(typename);
object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
"mp-affinity", NULL);
cs = CPU(cpuobj);
cs->cpu_index = n;
node_id = possible_cpus->cpus[cs->cpu_index].props.node_id;
if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
/* by default CPUState::numa_node was 0 if it's not set via CLI
* keep it this way for now but in future we probably should
* refuse to start up with incomplete numa mapping */
node_id = 0;
}
if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
cs->numa_node = node_id;
} else {
/* CPU isn't device_add compatible yet, this shouldn't happen */
error_setg(&error_abort, "user set node-id not implemented");
}
if (!vms->secure) {
@ -1518,6 +1555,46 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
}
}
static CpuInstanceProperties
virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
assert(cpu_index < possible_cpus->len);
return possible_cpus->cpus[cpu_index].props;
}
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
{
int n;
VirtMachineState *vms = VIRT_MACHINE(ms);
if (ms->possible_cpus) {
assert(ms->possible_cpus->len == max_cpus);
return ms->possible_cpus;
}
ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
sizeof(CPUArchId) * max_cpus);
ms->possible_cpus->len = max_cpus;
for (n = 0; n < ms->possible_cpus->len; n++) {
ms->possible_cpus->cpus[n].arch_id =
virt_cpu_mp_affinity(vms, n);
ms->possible_cpus->cpus[n].props.has_thread_id = true;
ms->possible_cpus->cpus[n].props.thread_id = n;
/* default distribution of CPUs over NUMA nodes */
if (nb_numa_nodes) {
/* preset values but do not enable them i.e. 'has_node_id = false',
* numa init code will enable them later if manual mapping wasn't
* present on CLI */
ms->possible_cpus->cpus[n].props.node_id = n % nb_numa_nodes;
}
}
return ms->possible_cpus;
}
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@ -1534,6 +1611,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
mc->pci_allow_0_address = true;
/* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
mc->minimum_page_bits = 12;
mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
}
static const TypeInfo virt_machine_info = {

View File

@ -17,8 +17,10 @@
#include "qapi/visitor.h"
#include "hw/sysbus.h"
#include "sysemu/sysemu.h"
#include "sysemu/numa.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"
#include "sysemu/numa.h"
static char *machine_get_accel(Object *obj, Error **errp)
{
@ -388,6 +390,102 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
return head;
}
/**
* machine_set_cpu_numa_node:
* @machine: machine object to modify
* @props: specifies which cpu objects to assign to
* numa node specified by @props.node_id
* @errp: if an error occurs, a pointer to an area to store the error
*
* Associate NUMA node specified by @props.node_id with cpu slots that
* match socket/core/thread-ids specified by @props. It's recommended to use
* query-hotpluggable-cpus.props values to specify affected cpu slots,
* which would lead to exact 1:1 mapping of cpu slots to NUMA node.
*
* However for CLI convenience it's possible to pass in subset of properties,
* which would affect all cpu slots that match it.
* Ex for pc machine:
* -smp 4,cores=2,sockets=2 -numa node,nodeid=0 -numa node,nodeid=1 \
* -numa cpu,node-id=0,socket_id=0 \
* -numa cpu,node-id=1,socket_id=1
* will assign all child cores of socket 0 to node 0 and
* of socket 1 to node 1.
*
* On attempt of reassigning (already assigned) cpu slot to another NUMA node,
* return error.
* Empty subset is disallowed and function will return with error in this case.
*/
void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
bool match = false;
int i;
if (!mc->possible_cpu_arch_ids) {
error_setg(errp, "mapping of CPUs to NUMA node is not supported");
return;
}
/* disabling node mapping is not supported, forbid it */
assert(props->has_node_id);
/* force board to initialize possible_cpus if it hasn't been done yet */
mc->possible_cpu_arch_ids(machine);
for (i = 0; i < machine->possible_cpus->len; i++) {
CPUArchId *slot = &machine->possible_cpus->cpus[i];
/* reject unsupported by board properties */
if (props->has_thread_id && !slot->props.has_thread_id) {
error_setg(errp, "thread-id is not supported");
return;
}
if (props->has_core_id && !slot->props.has_core_id) {
error_setg(errp, "core-id is not supported");
return;
}
if (props->has_socket_id && !slot->props.has_socket_id) {
error_setg(errp, "socket-id is not supported");
return;
}
/* skip slots with explicit mismatch */
if (props->has_thread_id && props->thread_id != slot->props.thread_id) {
continue;
}
if (props->has_core_id && props->core_id != slot->props.core_id) {
continue;
}
if (props->has_socket_id && props->socket_id != slot->props.socket_id) {
continue;
}
/* reject assignment if slot is already assigned, for compatibility
* of legacy cpu_index mapping with SPAPR core based mapping do not
* error out if cpu thread and matched core have the same node-id */
if (slot->props.has_node_id &&
slot->props.node_id != props->node_id) {
error_setg(errp, "CPU is already assigned to node-id: %" PRId64,
slot->props.node_id);
return;
}
/* assign slot to node as it's matched '-numa cpu' key */
match = true;
slot->props.node_id = props->node_id;
slot->props.has_node_id = props->has_node_id;
}
if (!match) {
error_setg(errp, "no match found");
}
}
static void machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@ -400,6 +498,7 @@ static void machine_class_init(ObjectClass *oc, void *data)
* On Linux, each node's border has to be 8MB aligned
*/
mc->numa_mem_align_shift = 23;
mc->numa_auto_assign_ram = numa_default_auto_assign_ram;
object_class_property_add_str(oc, "accel",
machine_get_accel, machine_set_accel, &error_abort);
@ -580,6 +679,69 @@ bool machine_mem_merge(MachineState *machine)
return machine->mem_merge;
}
static char *cpu_slot_to_string(const CPUArchId *cpu)
{
GString *s = g_string_new(NULL);
if (cpu->props.has_socket_id) {
g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id);
}
if (cpu->props.has_core_id) {
if (s->len) {
g_string_append_printf(s, ", ");
}
g_string_append_printf(s, "core-id: %"PRId64, cpu->props.core_id);
}
if (cpu->props.has_thread_id) {
if (s->len) {
g_string_append_printf(s, ", ");
}
g_string_append_printf(s, "thread-id: %"PRId64, cpu->props.thread_id);
}
return g_string_free(s, false);
}
static void machine_numa_validate(MachineState *machine)
{
int i;
GString *s = g_string_new(NULL);
MachineClass *mc = MACHINE_GET_CLASS(machine);
const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine);
assert(nb_numa_nodes);
for (i = 0; i < possible_cpus->len; i++) {
const CPUArchId *cpu_slot = &possible_cpus->cpus[i];
/* at this point numa mappings are initilized by CLI options
* or with default mappings so it's sufficient to list
* all not yet mapped CPUs here */
/* TODO: make it hard error in future */
if (!cpu_slot->props.has_node_id) {
char *cpu_str = cpu_slot_to_string(cpu_slot);
g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i,
cpu_str);
g_free(cpu_str);
}
}
if (s->len) {
error_report("warning: CPU(s) not present in any NUMA nodes: %s",
s->str);
error_report("warning: All CPU(s) up to maxcpus should be described "
"in NUMA config, ability to start up with partial NUMA "
"mappings is obsoleted and will be removed in future");
}
g_string_free(s, true);
}
void machine_run_board_init(MachineState *machine)
{
MachineClass *machine_class = MACHINE_GET_CLASS(machine);
if (nb_numa_nodes) {
machine_numa_validate(machine);
}
machine_class->init(machine);
}
static void machine_class_finalize(ObjectClass *klass, void *data)
{
MachineClass *mc = MACHINE_CLASS(klass);

View File

@ -2335,7 +2335,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
srat->reserved1 = cpu_to_le32(1);
for (i = 0; i < apic_ids->len; i++) {
int j = numa_get_node_for_cpu(i);
int node_id = apic_ids->cpus[i].props.has_node_id ?
apic_ids->cpus[i].props.node_id : 0;
uint32_t apic_id = apic_ids->cpus[i].arch_id;
if (apic_id < 255) {
@ -2345,9 +2346,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
core->type = ACPI_SRAT_PROCESSOR_APIC;
core->length = sizeof(*core);
core->local_apic_id = apic_id;
if (j < nb_numa_nodes) {
core->proximity_lo = j;
}
core->proximity_lo = node_id;
memset(core->proximity_hi, 0, 3);
core->local_sapic_eid = 0;
core->flags = cpu_to_le32(1);
@ -2358,9 +2357,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
core->type = ACPI_SRAT_PROCESSOR_x2APIC;
core->length = sizeof(*core);
core->x2apic_id = cpu_to_le32(apic_id);
if (j < nb_numa_nodes) {
core->proximity_domain = cpu_to_le32(j);
}
core->proximity_domain = cpu_to_le32(node_id);
core->flags = cpu_to_le32(1);
}
}
@ -2707,6 +2704,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
if (pcms->numa_nodes) {
acpi_add_table(table_offsets, tables_blob);
build_srat(tables_blob, tables->linker, machine);
if (have_numa_distance) {
acpi_add_table(table_offsets, tables_blob);
build_slit(tables_blob, tables->linker);
}
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);

View File

@ -747,7 +747,9 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
{
FWCfgState *fw_cfg;
uint64_t *numa_fw_cfg;
int i, j;
int i;
const CPUArchIdList *cpus;
MachineClass *mc = MACHINE_GET_CLASS(pcms);
fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
@ -782,12 +784,12 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
*/
numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes);
numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
for (i = 0; i < max_cpus; i++) {
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
cpus = mc->possible_cpu_arch_ids(MACHINE(pcms));
for (i = 0; i < cpus->len; i++) {
unsigned int apic_id = cpus->cpus[i].arch_id;
assert(apic_id < pcms->apic_id_limit);
j = numa_get_node_for_cpu(i);
if (j < nb_numa_nodes) {
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
if (cpus->cpus[i].props.has_node_id) {
numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id);
}
}
for (i = 0; i < nb_numa_nodes; i++) {
@ -1893,6 +1895,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
int idx;
int node_id;
CPUState *cs;
CPUArchId *cpu_slot;
X86CPUTopoInfo topo;
@ -1982,6 +1985,22 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
cs = CPU(cpu);
cs->cpu_index = idx;
node_id = cpu_slot->props.node_id;
if (!cpu_slot->props.has_node_id) {
/* by default CPUState::numa_node was 0 if it's not set via CLI
* keep it this way for now but in future we probably should
* refuse to start up with incomplete numa mapping */
node_id = 0;
}
if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
cs->numa_node = node_id;
} else if (cs->numa_node != node_id) {
error_setg(errp, "node-id %d must match numa node specified"
"with -numa option for cpu-index %d",
cs->numa_node, cs->cpu_index);
return;
}
}
static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
@ -2243,12 +2262,14 @@ static void pc_machine_reset(void)
}
}
static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
static CpuInstanceProperties
pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
{
X86CPUTopoInfo topo;
x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
&topo);
return topo.pkg_id;
MachineClass *mc = MACHINE_GET_CLASS(ms);
const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
assert(cpu_index < possible_cpus->len);
return possible_cpus->cpus[cpu_index].props;
}
static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
@ -2280,6 +2301,15 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
ms->possible_cpus->cpus[i].props.has_thread_id = true;
ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
/* default distribution of CPUs over NUMA nodes */
if (nb_numa_nodes) {
/* preset values but do not enable them i.e. 'has_node_id = false',
* numa init code will enable them later if manual mapping wasn't
* present on CLI */
ms->possible_cpus->cpus[i].props.node_id =
topo.pkg_id % nb_numa_nodes;
}
}
return ms->possible_cpus;
}
@ -2322,7 +2352,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->save_tsc_khz = true;
mc->get_hotplug_handler = pc_get_hotpug_handler;
mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";

View File

@ -54,6 +54,7 @@
#endif
#include "migration/migration.h"
#include "kvm_i386.h"
#include "sysemu/numa.h"
#define MAX_IDE_BUS 2
@ -442,6 +443,7 @@ static void pc_i440fx_2_9_machine_options(MachineClass *m)
pc_i440fx_machine_options(m);
m->alias = "pc";
m->is_default = 1;
m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}
DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL,

View File

@ -47,6 +47,7 @@
#include "hw/usb.h"
#include "qemu/error-report.h"
#include "migration/migration.h"
#include "sysemu/numa.h"
/* ICH9 AHCI has 6 ports */
#define MAX_SATA_PORTS 6
@ -305,6 +306,7 @@ static void pc_q35_2_9_machine_options(MachineClass *m)
{
pc_q35_machine_options(m);
m->alias = "q35";
m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}
DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL,

View File

@ -2831,9 +2831,11 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
Error *local_err = NULL;
CPUCore *cc = CPU_CORE(dev);
sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev);
char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
const char *type = object_get_typename(OBJECT(dev));
CPUArchId *core_slot;
int node_id;
int index;
if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
@ -2868,6 +2870,21 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
goto out;
}
node_id = core_slot->props.node_id;
if (!core_slot->props.has_node_id) {
/* by default CPUState::numa_node was 0 if it's not set via CLI
* keep it this way for now but in future we probably should
* refuse to start up with incomplete numa mapping */
node_id = 0;
}
if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) {
sc->node_id = node_id;
} else if (sc->node_id != node_id) {
error_setg(&local_err, "node-id %d must match numa node specified"
"with -numa option for cpu-index %d", sc->node_id, cc->core_id);
goto out;
}
out:
g_free(base_core_type);
error_propagate(errp, local_err);
@ -2988,11 +3005,18 @@ static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
return NULL;
}
static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
static CpuInstanceProperties
spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
{
/* Allocate to NUMA nodes on a "socket" basis (not that concept of
* socket means much for the paravirtualized PAPR platform) */
return cpu_index / smp_threads / smp_cores;
CPUArchId *core_slot;
MachineClass *mc = MACHINE_GET_CLASS(machine);
/* make sure possible_cpu are intialized */
mc->possible_cpu_arch_ids(machine);
/* get CPU core slot containing thread that matches cpu_index */
core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
assert(core_slot);
return core_slot->props;
}
static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
@ -3019,8 +3043,15 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
machine->possible_cpus->cpus[i].arch_id = core_id;
machine->possible_cpus->cpus[i].props.has_core_id = true;
machine->possible_cpus->cpus[i].props.core_id = core_id;
/* TODO: add 'has_node/node' here to describe
to which node core belongs */
/* default distribution of CPUs over NUMA nodes */
if (nb_numa_nodes) {
/* preset values but do not enable them i.e. 'has_node_id = false',
* numa init code will enable them later if manual mapping wasn't
* present on CLI */
machine->possible_cpus->cpus[i].props.node_id =
core_id / smp_threads / smp_cores % nb_numa_nodes;
}
}
return machine->possible_cpus;
}
@ -3145,7 +3176,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
hc->pre_plug = spapr_machine_device_pre_plug;
hc->plug = spapr_machine_device_plug;
hc->unplug = spapr_machine_device_unplug;
mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
hc->unplug_request = spapr_machine_device_unplug_request;
@ -3249,6 +3280,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc)
{
spapr_machine_2_10_class_options(mc);
SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}
DEFINE_SPAPR_MACHINE(2_9, "2.9", false);

View File

@ -176,13 +176,11 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
const char *typename = object_class_get_name(scc->cpu_class);
size_t size = object_type_get_instance_size(typename);
Error *local_err = NULL;
int core_node_id = numa_get_node_for_cpu(cc->core_id);;
void *obj;
int i, j;
sc->threads = g_malloc0(size * cc->nr_threads);
for (i = 0; i < cc->nr_threads; i++) {
int node_id;
char id[32];
CPUState *cs;
@ -192,17 +190,8 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
cs = CPU(obj);
cs->cpu_index = cc->core_id + i;
/* Set NUMA node for the added CPUs */
node_id = numa_get_node_for_cpu(cs->cpu_index);
if (node_id != core_node_id) {
error_setg(&local_err, "Invalid node-id=%d of thread[cpu-index: %d]"
" on CPU[core-id: %d, node-id: %d], node-id must be the same",
node_id, cs->cpu_index, cc->core_id, core_node_id);
goto err;
}
if (node_id < nb_numa_nodes) {
cs->numa_node = node_id;
}
/* Set NUMA node for the threads belonged to core */
cs->numa_node = sc->node_id;
snprintf(id, sizeof(id), "thread[%d]", i);
object_property_add_child(OBJECT(sc), id, obj, &local_err);
@ -263,6 +252,11 @@ static const char *spapr_core_models[] = {
"POWER9_v1.0",
};
static Property spapr_cpu_core_properties[] = {
DEFINE_PROP_INT32("node-id", sPAPRCPUCore, node_id, CPU_UNSET_NUMA_NODE_ID),
DEFINE_PROP_END_OF_LIST()
};
void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@ -270,6 +264,7 @@ void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
dc->realize = spapr_cpu_core_realize;
dc->unrealize = spapr_cpu_core_unrealizefn;
dc->props = spapr_cpu_core_properties;
scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data);
g_assert(scc->cpu_class);
}

View File

@ -389,4 +389,5 @@ GCC_FMT_ATTR(2, 3);
void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
uint64_t len, int node, MemoryAffinityFlags flags);
void build_slit(GArray *table_data, BIOSLinker *linker);
#endif

View File

@ -32,6 +32,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
MachineClass *find_default_machine(void);
extern MachineState *current_machine;
void machine_run_board_init(MachineState *machine);
bool machine_usb(MachineState *machine);
bool machine_kernel_irqchip_allowed(MachineState *machine);
bool machine_kernel_irqchip_required(MachineState *machine);
@ -42,6 +43,9 @@ bool machine_dump_guest_core(MachineState *machine);
bool machine_mem_merge(MachineState *machine);
void machine_register_compat_props(MachineState *machine);
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props,
Error **errp);
/**
* CPUArchId:
@ -74,7 +78,10 @@ typedef struct {
* of HotplugHandler object, which handles hotplug operation
* for a given @dev. It may return NULL if @dev doesn't require
* any actions to be performed by hotplug handler.
* @cpu_index_to_socket_id:
* @cpu_index_to_instance_props:
* used to provide @cpu_index to socket/core/thread number mapping, allowing
* legacy code to perform maping from cpu_index to topology properties
* Returns: tuple of socket/core/thread ids given cpu_index belongs to.
* used to provide @cpu_index to socket number mapping, allowing
* a machine to group CPU threads belonging to the same socket/package
* Returns: socket number given cpu_index belongs to.
@ -136,10 +143,13 @@ struct MachineClass {
int minimum_page_bits;
bool has_hotpluggable_cpus;
int numa_mem_align_shift;
void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
unsigned cpu_index);
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
};

View File

@ -27,6 +27,7 @@ typedef struct sPAPRCPUCore {
/*< public >*/
void *threads;
int node_id;
} sPAPRCPUCore;
typedef struct sPAPRCPUCoreClass {

View File

@ -97,5 +97,6 @@ typedef struct SSIBus SSIBus;
typedef struct uWireSlave uWireSlave;
typedef struct VirtIODevice VirtIODevice;
typedef struct Visitor Visitor;
typedef struct node_info NodeInfo;
#endif /* QEMU_TYPEDEFS_H */

View File

@ -258,6 +258,8 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
struct qemu_work_item;
#define CPU_UNSET_NUMA_NODE_ID -1
/**
* CPUState:
* @cpu_index: CPU index (informative).

View File

@ -8,6 +8,7 @@
#include "hw/boards.h"
extern int nb_numa_nodes; /* Number of NUMA nodes */
extern bool have_numa_distance;
struct numa_addr_range {
ram_addr_t mem_start;
@ -15,24 +16,23 @@ struct numa_addr_range {
QLIST_ENTRY(numa_addr_range) entry;
};
typedef struct node_info {
struct node_info {
uint64_t node_mem;
unsigned long *node_cpu;
struct HostMemoryBackend *node_memdev;
bool present;
QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
} NodeInfo;
uint8_t distance[MAX_NODES];
};
extern NodeInfo numa_info[MAX_NODES];
void parse_numa_opts(MachineClass *mc);
void numa_post_machine_init(void);
void parse_numa_opts(MachineState *ms);
void query_numa_node_mem(uint64_t node_mem[]);
extern QemuOptsList qemu_numa_opts;
void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
uint32_t numa_get_node(ram_addr_t addr, Error **errp);
/* on success returns node index in numa_info,
* on failure returns nb_numa_nodes */
int numa_get_node_for_cpu(int idx);
void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
#endif

View File

@ -166,6 +166,10 @@ extern int mem_prealloc;
#define MAX_NODES 128
#define NUMA_NODE_UNASSIGNED MAX_NODES
#define NUMA_DISTANCE_MIN 10
#define NUMA_DISTANCE_DEFAULT 20
#define NUMA_DISTANCE_MAX 254
#define NUMA_DISTANCE_UNREACHABLE 255
#define MAX_OPTION_ROMS 16
typedef struct QEMUOptionRom {

310
numa.c
View File

@ -51,6 +51,7 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
* For all nodes, nodeid < max_numa_nodeid
*/
int nb_numa_nodes;
bool have_numa_distance;
NodeInfo numa_info[MAX_NODES];
void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
@ -140,10 +141,12 @@ uint32_t numa_get_node(ram_addr_t addr, Error **errp)
return -1;
}
static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
QemuOpts *opts, Error **errp)
{
uint16_t nodenr;
uint16List *cpus = NULL;
MachineClass *mc = MACHINE_GET_CLASS(ms);
if (node->has_nodeid) {
nodenr = node->nodeid;
@ -162,7 +165,12 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
return;
}
if (!mc->cpu_index_to_instance_props) {
error_report("NUMA is not supported by this machine-type");
exit(1);
}
for (cpus = node->cpus; cpus; cpus = cpus->next) {
CpuInstanceProperties props;
if (cpus->value >= max_cpus) {
error_setg(errp,
"CPU index (%" PRIu16 ")"
@ -170,7 +178,10 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
cpus->value, max_cpus);
return;
}
bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
props = mc->cpu_index_to_instance_props(ms, cpus->value);
props.node_id = nodenr;
props.has_node_id = true;
machine_set_cpu_numa_node(ms, &props, &error_fatal);
}
if (node->has_mem && node->has_memdev) {
@ -212,9 +223,47 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
}
static void parse_numa_distance(NumaDistOptions *dist, Error **errp)
{
uint16_t src = dist->src;
uint16_t dst = dist->dst;
uint8_t val = dist->val;
if (src >= MAX_NODES || dst >= MAX_NODES) {
error_setg(errp,
"Invalid node %" PRIu16
", max possible could be %" PRIu16,
MAX(src, dst), MAX_NODES);
return;
}
if (!numa_info[src].present || !numa_info[dst].present) {
error_setg(errp, "Source/Destination NUMA node is missing. "
"Please use '-numa node' option to declare it first.");
return;
}
if (val < NUMA_DISTANCE_MIN) {
error_setg(errp, "NUMA distance (%" PRIu8 ") is invalid, "
"it shouldn't be less than %d.",
val, NUMA_DISTANCE_MIN);
return;
}
if (src == dst && val != NUMA_DISTANCE_MIN) {
error_setg(errp, "Local distance of node %d should be %d.",
src, NUMA_DISTANCE_MIN);
return;
}
numa_info[src].distance[dst] = val;
have_numa_distance = true;
}
static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
{
NumaOptions *object = NULL;
MachineState *ms = opaque;
Error *err = NULL;
{
@ -229,12 +278,33 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
switch (object->type) {
case NUMA_OPTIONS_TYPE_NODE:
numa_node_parse(&object->u.node, opts, &err);
parse_numa_node(ms, &object->u.node, opts, &err);
if (err) {
goto end;
}
nb_numa_nodes++;
break;
case NUMA_OPTIONS_TYPE_DIST:
parse_numa_distance(&object->u.dist, &err);
if (err) {
goto end;
}
break;
case NUMA_OPTIONS_TYPE_CPU:
if (!object->u.cpu.has_node_id) {
error_setg(&err, "Missing mandatory node-id property");
goto end;
}
if (!numa_info[object->u.cpu.node_id].present) {
error_setg(&err, "Invalid node-id=%" PRId64 ", NUMA node must be "
"defined with -numa node,nodeid=ID before it's used with "
"-numa cpu,node-id=ID", object->u.cpu.node_id);
goto end;
}
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
&err);
break;
default:
abort();
}
@ -249,60 +319,118 @@ end:
return 0;
}
static char *enumerate_cpus(unsigned long *cpus, int max_cpus)
/* If all node pair distances are symmetric, then only distances
* in one direction are enough. If there is even one asymmetric
* pair, though, then all distances must be provided. The
* distance from a node to itself is always NUMA_DISTANCE_MIN,
* so providing it is never necessary.
*/
static void validate_numa_distance(void)
{
int cpu;
bool first = true;
GString *s = g_string_new(NULL);
int src, dst;
bool is_asymmetrical = false;
for (cpu = find_first_bit(cpus, max_cpus);
cpu < max_cpus;
cpu = find_next_bit(cpus, max_cpus, cpu + 1)) {
g_string_append_printf(s, "%s%d", first ? "" : " ", cpu);
first = false;
}
return g_string_free(s, FALSE);
}
for (src = 0; src < nb_numa_nodes; src++) {
for (dst = src; dst < nb_numa_nodes; dst++) {
if (numa_info[src].distance[dst] == 0 &&
numa_info[dst].distance[src] == 0) {
if (src != dst) {
error_report("The distance between node %d and %d is "
"missing, at least one distance value "
"between each nodes should be provided.",
src, dst);
exit(EXIT_FAILURE);
}
}
static void validate_numa_cpus(void)
{
int i;
unsigned long *seen_cpus = bitmap_new(max_cpus);
for (i = 0; i < nb_numa_nodes; i++) {
if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, max_cpus)) {
bitmap_and(seen_cpus, seen_cpus,
numa_info[i].node_cpu, max_cpus);
error_report("CPU(s) present in multiple NUMA nodes: %s",
enumerate_cpus(seen_cpus, max_cpus));
g_free(seen_cpus);
exit(EXIT_FAILURE);
if (numa_info[src].distance[dst] != 0 &&
numa_info[dst].distance[src] != 0 &&
numa_info[src].distance[dst] !=
numa_info[dst].distance[src]) {
is_asymmetrical = true;
}
}
bitmap_or(seen_cpus, seen_cpus,
numa_info[i].node_cpu, max_cpus);
}
if (!bitmap_full(seen_cpus, max_cpus)) {
char *msg;
bitmap_complement(seen_cpus, seen_cpus, max_cpus);
msg = enumerate_cpus(seen_cpus, max_cpus);
error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg);
error_report("warning: All CPU(s) up to maxcpus should be described "
"in NUMA config");
g_free(msg);
if (is_asymmetrical) {
for (src = 0; src < nb_numa_nodes; src++) {
for (dst = 0; dst < nb_numa_nodes; dst++) {
if (src != dst && numa_info[src].distance[dst] == 0) {
error_report("At least one asymmetrical pair of "
"distances is given, please provide distances "
"for both directions of all node pairs.");
exit(EXIT_FAILURE);
}
}
}
}
g_free(seen_cpus);
}
void parse_numa_opts(MachineClass *mc)
static void complete_init_numa_distance(void)
{
int src, dst;
/* Fixup NUMA distance by symmetric policy because if it is an
* asymmetric distance table, it should be a complete table and
* there would not be any missing distance except local node, which
* is verified by validate_numa_distance above.
*/
for (src = 0; src < nb_numa_nodes; src++) {
for (dst = 0; dst < nb_numa_nodes; dst++) {
if (numa_info[src].distance[dst] == 0) {
if (src == dst) {
numa_info[src].distance[dst] = NUMA_DISTANCE_MIN;
} else {
numa_info[src].distance[dst] = numa_info[dst].distance[src];
}
}
}
}
}
void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size)
{
int i;
uint64_t usedmem = 0;
for (i = 0; i < MAX_NODES; i++) {
numa_info[i].node_cpu = bitmap_new(max_cpus);
/* Align each node according to the alignment
* requirements of the machine class
*/
for (i = 0; i < nb_nodes - 1; i++) {
nodes[i].node_mem = (size / nb_nodes) &
~((1 << mc->numa_mem_align_shift) - 1);
usedmem += nodes[i].node_mem;
}
nodes[i].node_mem = size - usedmem;
}
if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, NULL, NULL)) {
void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size)
{
int i;
uint64_t usedmem = 0, node_mem;
uint64_t granularity = size / nb_nodes;
uint64_t propagate = 0;
for (i = 0; i < nb_nodes - 1; i++) {
node_mem = (granularity + propagate) &
~((1 << mc->numa_mem_align_shift) - 1);
propagate = granularity + propagate - node_mem;
nodes[i].node_mem = node_mem;
usedmem += node_mem;
}
nodes[i].node_mem = size - usedmem;
}
void parse_numa_opts(MachineState *ms)
{
int i;
const CPUArchIdList *possible_cpus;
MachineClass *mc = MACHINE_GET_CLASS(ms);
if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) {
exit(1);
}
@ -336,17 +464,8 @@ void parse_numa_opts(MachineClass *mc)
}
}
if (i == nb_numa_nodes) {
uint64_t usedmem = 0;
/* Align each node according to the alignment
* requirements of the machine class
*/
for (i = 0; i < nb_numa_nodes - 1; i++) {
numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
~((1 << mc->numa_mem_align_shift) - 1);
usedmem += numa_info[i].node_mem;
}
numa_info[i].node_mem = ram_size - usedmem;
assert(mc->numa_auto_assign_ram);
mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
}
numa_total = 0;
@ -366,50 +485,55 @@ void parse_numa_opts(MachineClass *mc)
numa_set_mem_ranges();
for (i = 0; i < nb_numa_nodes; i++) {
if (!bitmap_empty(numa_info[i].node_cpu, max_cpus)) {
/* assign CPUs to nodes using board provided default mapping */
if (!mc->cpu_index_to_instance_props || !mc->possible_cpu_arch_ids) {
error_report("default CPUs to NUMA node mapping isn't supported");
exit(1);
}
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < possible_cpus->len; i++) {
if (possible_cpus->cpus[i].props.has_node_id) {
break;
}
}
/* Historically VCPUs were assigned in round-robin order to NUMA
* nodes. However it causes issues with guest not handling it nice
* in case where cores/threads from a multicore CPU appear on
* different nodes. So allow boards to override default distribution
* rule grouping VCPUs by socket so that VCPUs from the same socket
* would be on the same node.
*/
if (i == nb_numa_nodes) {
for (i = 0; i < max_cpus; i++) {
unsigned node_id = i % nb_numa_nodes;
if (mc->cpu_index_to_socket_id) {
node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
}
set_bit(i, numa_info[node_id].node_cpu);
/* no CPUs are assigned to NUMA nodes */
if (i == possible_cpus->len) {
for (i = 0; i < max_cpus; i++) {
CpuInstanceProperties props;
/* fetch default mapping from board and enable it */
props = mc->cpu_index_to_instance_props(ms, i);
props.has_node_id = true;
machine_set_cpu_numa_node(ms, &props, &error_fatal);
}
}
validate_numa_cpus();
/* QEMU needs at least all unique node pair distances to build
* the whole NUMA distance table. QEMU treats the distance table
* as symmetric by default, i.e. distance A->B == distance B->A.
* Thus, QEMU is able to complete the distance table
* initialization even though only distance A->B is provided and
* distance B->A is not. QEMU knows the distance of a node to
* itself is always 10, so A->A distances may be omitted. When
* the distances of two nodes of a pair differ, i.e. distance
* A->B != distance B->A, then that means the distance table is
* asymmetric. In this case, the distances for both directions
* of all node pairs are required.
*/
if (have_numa_distance) {
/* Validate enough NUMA distance information was provided. */
validate_numa_distance();
/* Validation succeeded, now fill in any missing distances. */
complete_init_numa_distance();
}
} else {
numa_set_mem_node_id(0, ram_size, 0);
}
}
void numa_post_machine_init(void)
{
CPUState *cpu;
int i;
CPU_FOREACH(cpu) {
for (i = 0; i < nb_numa_nodes; i++) {
assert(cpu->cpu_index < max_cpus);
if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
cpu->numa_node = i;
}
}
}
}
static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
const char *name,
uint64_t ram_size)
@ -560,20 +684,6 @@ MemdevList *qmp_query_memdev(Error **errp)
return list;
}
int numa_get_node_for_cpu(int idx)
{
int i;
assert(idx < max_cpus);
for (i = 0; i < nb_numa_nodes; i++) {
if (test_bit(idx, numa_info[i].node_cpu)) {
break;
}
}
return i;
}
void ram_block_notifier_add(RAMBlockNotifier *n)
{
QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);

View File

@ -1325,6 +1325,9 @@
#
# @thread_id: ID of the underlying host thread
#
# @props: properties describing to which node/socket/core/thread
# virtual CPU belongs to, provided if supported by board (since 2.10)
#
# @arch: architecture of the cpu, which determines which additional fields
# will be listed (since 2.6)
#
@ -1335,7 +1338,8 @@
##
{ 'union': 'CpuInfo',
'base': {'CPU': 'int', 'current': 'bool', 'halted': 'bool',
'qom_path': 'str', 'thread_id': 'int', 'arch': 'CpuInfoArch' },
'qom_path': 'str', 'thread_id': 'int',
'*props': 'CpuInstanceProperties', 'arch': 'CpuInfoArch' },
'discriminator': 'arch',
'data': { 'x86': 'CpuInfoX86',
'sparc': 'CpuInfoSPARC',
@ -5682,10 +5686,16 @@
##
# @NumaOptionsType:
#
# @node: NUMA nodes configuration
#
# @dist: NUMA distance configuration (since 2.10)
#
# @cpu: property based CPU(s) to node mapping (Since: 2.10)
#
# Since: 2.1
##
{ 'enum': 'NumaOptionsType',
'data': [ 'node' ] }
'data': [ 'node', 'dist', 'cpu' ] }
##
# @NumaOptions:
@ -5698,7 +5708,9 @@
'base': { 'type': 'NumaOptionsType' },
'discriminator': 'type',
'data': {
'node': 'NumaNodeOptions' }}
'node': 'NumaNodeOptions',
'dist': 'NumaDistOptions',
'cpu': 'NumaCpuOptions' }}
##
# @NumaNodeOptions:
@ -5726,6 +5738,41 @@
'*mem': 'size',
'*memdev': 'str' }}
##
# @NumaDistOptions:
#
# Set the distance between 2 NUMA nodes.
#
# @src: source NUMA node.
#
# @dst: destination NUMA node.
#
# @val: NUMA distance from source node to destination node.
# When a node is unreachable from another node, set the distance
# between them to 255.
#
# Since: 2.10
##
{ 'struct': 'NumaDistOptions',
'data': {
'src': 'uint16',
'dst': 'uint16',
'val': 'uint8' }}
##
# @NumaCpuOptions:
#
# Option "-numa cpu" overrides default cpu to node mapping.
# It accepts the same set of cpu properties as returned by
# query-hotpluggable-cpus[].props, where node-id could be used to
# override default node mapping.
#
# Since: 2.10
##
{ 'struct': 'NumaCpuOptions',
'base': 'CpuInstanceProperties',
'data' : {} }
##
# @HostMemPolicy:
#

View File

@ -139,13 +139,18 @@ ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n", QEMU_ARCH_ALL)
STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
@findex -numa
Define a NUMA node and assign RAM and VCPUs to it.
Set the NUMA distance from a source node to a destination node.
Legacy VCPU assignment uses @samp{cpus} option where
@var{firstcpu} and @var{lastcpu} are CPU indexes. Each
@samp{cpus} option represent a contiguous range of CPU indexes
(or a single VCPU if @var{lastcpu} is omitted). A non-contiguous
@ -159,6 +164,24 @@ a NUMA node:
-numa node,cpus=0-2,cpus=5
@end example
@samp{cpu} option is a new alternative to @samp{cpus} option
which uses @samp{socket-id|core-id|thread-id} properties to assign
CPU objects to a @var{node} using topology layout properties of CPU.
The set of properties is machine specific, and depends on used
machine type/@samp{smp} options. It could be queried with
@samp{hotpluggable-cpus} monitor command.
@samp{node-id} property specifies @var{node} to which CPU object
will be assigned, it's required for @var{node} to be declared
with @samp{node} option before it's used with @samp{cpu} option.
For example:
@example
-M pc \
-smp 1,sockets=2,maxcpus=2 \
-numa node,nodeid=0 -numa node,nodeid=1 \
-numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1
@end example
@samp{mem} assigns a given RAM amount to a node. @samp{memdev}
assigns RAM from a given memory backend device to a node. If
@samp{mem} and @samp{memdev} are omitted in all nodes, RAM is
@ -167,6 +190,17 @@ split equally between them.
@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore,
if one node uses @samp{memdev}, all of them have to use it.
@var{source} and @var{destination} are NUMA node IDs.
@var{distance} is the NUMA distance from @var{source} to @var{destination}.
The distance from a node to itself is always 10. If any pair of nodes is
given a distance, then all pairs must be given distances. Although, when
distances are only given in one direction for each pair of nodes, then
the distances in the opposite directions are assumed to be the same. If,
however, an asymmetrical pair of distances is given for even one node
pair, then all node pairs must be provided distance values for both
directions, even when they are symmetrical. When a node is unreachable
from another node, set the pair's distance to 255.
Note that the -@option{numa} option doesn't allocate any of the
specified resources, it just assigns existing resources to NUMA
nodes. This means that one still has to use the @option{-m},

View File

@ -458,6 +458,13 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
}
}
uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz)
{
uint32_t Aff1 = idx / clustersz;
uint32_t Aff0 = idx % clustersz;
return (Aff1 << ARM_AFF1_SHIFT) | Aff0;
}
static void arm_cpu_initfn(Object *obj)
{
CPUState *cs = CPU(obj);
@ -709,9 +716,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
* so these bits always RAZ.
*/
if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) {
uint32_t Aff1 = cs->cpu_index / ARM_DEFAULT_CPUS_PER_CLUSTER;
uint32_t Aff0 = cs->cpu_index % ARM_DEFAULT_CPUS_PER_CLUSTER;
cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0;
cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index,
ARM_DEFAULT_CPUS_PER_CLUSTER);
}
if (cpu->reset_hivecs) {
@ -1567,6 +1573,7 @@ static Property arm_cpu_properties[] = {
DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
mp_affinity, ARM64_AFFINITY_INVALID),
DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID),
DEFINE_PROP_END_OF_LIST()
};

View File

@ -710,6 +710,8 @@ static inline ARMCPU *arm_env_get_cpu(CPUARMState *env)
return container_of(env, ARMCPU, env);
}
uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz);
#define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e))
#define ENV_OFFSET offsetof(ARMCPU, env)

View File

@ -2635,28 +2635,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
X86CPU *cpu = x86_env_get_cpu(env);
CPUState *cs = CPU(cpu);
uint32_t pkg_offset;
uint32_t limit;
/* test if maximum index reached */
if (index & 0x80000000) {
if (index > env->cpuid_xlevel) {
if (env->cpuid_xlevel2 > 0) {
/* Handle the Centaur's CPUID instruction. */
if (index > env->cpuid_xlevel2) {
index = env->cpuid_xlevel2;
} else if (index < 0xC0000000) {
index = env->cpuid_xlevel;
}
} else {
/* Intel documentation states that invalid EAX input will
* return the same information as EAX=cpuid_level
* (Intel SDM Vol. 2A - Instruction Set Reference - CPUID)
*/
index = env->cpuid_level;
}
}
/* Calculate & apply limits for different index ranges */
if (index >= 0xC0000000) {
limit = env->cpuid_xlevel2;
} else if (index >= 0x80000000) {
limit = env->cpuid_xlevel;
} else {
if (index > env->cpuid_level)
index = env->cpuid_level;
limit = env->cpuid_level;
}
if (index > limit) {
/* Intel documentation states that invalid EAX input will
* return the same information as EAX=cpuid_level
* (Intel SDM Vol. 2A - Instruction Set Reference - CPUID)
*/
index = env->cpuid_level;
}
switch(index) {
@ -3991,6 +3986,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1),
DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1),
#endif
DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID),
DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false),
{ .name = "hv-spinlocks", .info = &qdev_prop_spinlocks },
DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false),

View File

@ -136,178 +136,48 @@ static const VMStateDescription vmstate_mtrr_var = {
#define VMSTATE_MTRR_VARS(_field, _state, _n, _v) \
VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar)
static int put_fpreg_error(QEMUFile *f, void *opaque, size_t size,
VMStateField *field, QJSON *vmdesc)
typedef struct x86_FPReg_tmp {
FPReg *parent;
uint64_t tmp_mant;
uint16_t tmp_exp;
} x86_FPReg_tmp;
static void fpreg_pre_save(void *opaque)
{
fprintf(stderr, "call put_fpreg() with invalid arguments\n");
exit(0);
return 0;
}
x86_FPReg_tmp *tmp = opaque;
/* XXX: add that in a FPU generic layer */
union x86_longdouble {
uint64_t mant;
uint16_t exp;
};
#define MANTD1(fp) (fp & ((1LL << 52) - 1))
#define EXPBIAS1 1023
#define EXPD1(fp) ((fp >> 52) & 0x7FF)
#define SIGND1(fp) ((fp >> 32) & 0x80000000)
static void fp64_to_fp80(union x86_longdouble *p, uint64_t temp)
{
int e;
/* mantissa */
p->mant = (MANTD1(temp) << 11) | (1LL << 63);
/* exponent + sign */
e = EXPD1(temp) - EXPBIAS1 + 16383;
e |= SIGND1(temp) >> 16;
p->exp = e;
}
static int get_fpreg(QEMUFile *f, void *opaque, size_t size,
VMStateField *field)
{
FPReg *fp_reg = opaque;
uint64_t mant;
uint16_t exp;
qemu_get_be64s(f, &mant);
qemu_get_be16s(f, &exp);
fp_reg->d = cpu_set_fp80(mant, exp);
return 0;
}
static int put_fpreg(QEMUFile *f, void *opaque, size_t size,
VMStateField *field, QJSON *vmdesc)
{
FPReg *fp_reg = opaque;
uint64_t mant;
uint16_t exp;
/* we save the real CPU data (in case of MMX usage only 'mant'
contains the MMX register */
cpu_get_fp80(&mant, &exp, fp_reg->d);
qemu_put_be64s(f, &mant);
qemu_put_be16s(f, &exp);
cpu_get_fp80(&tmp->tmp_mant, &tmp->tmp_exp, tmp->parent->d);
}
static int fpreg_post_load(void *opaque, int version)
{
x86_FPReg_tmp *tmp = opaque;
tmp->parent->d = cpu_set_fp80(tmp->tmp_mant, tmp->tmp_exp);
return 0;
}
static const VMStateInfo vmstate_fpreg = {
static const VMStateDescription vmstate_fpreg_tmp = {
.name = "fpreg_tmp",
.post_load = fpreg_post_load,
.pre_save = fpreg_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT64(tmp_mant, x86_FPReg_tmp),
VMSTATE_UINT16(tmp_exp, x86_FPReg_tmp),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_fpreg = {
.name = "fpreg",
.get = get_fpreg,
.put = put_fpreg,
.fields = (VMStateField[]) {
VMSTATE_WITH_TMP(FPReg, x86_FPReg_tmp, vmstate_fpreg_tmp),
VMSTATE_END_OF_LIST()
}
};
static int get_fpreg_1_mmx(QEMUFile *f, void *opaque, size_t size,
VMStateField *field)
{
union x86_longdouble *p = opaque;
uint64_t mant;
qemu_get_be64s(f, &mant);
p->mant = mant;
p->exp = 0xffff;
return 0;
}
static const VMStateInfo vmstate_fpreg_1_mmx = {
.name = "fpreg_1_mmx",
.get = get_fpreg_1_mmx,
.put = put_fpreg_error,
};
static int get_fpreg_1_no_mmx(QEMUFile *f, void *opaque, size_t size,
VMStateField *field)
{
union x86_longdouble *p = opaque;
uint64_t mant;
qemu_get_be64s(f, &mant);
fp64_to_fp80(p, mant);
return 0;
}
static const VMStateInfo vmstate_fpreg_1_no_mmx = {
.name = "fpreg_1_no_mmx",
.get = get_fpreg_1_no_mmx,
.put = put_fpreg_error,
};
static bool fpregs_is_0(void *opaque, int version_id)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
return (env->fpregs_format_vmstate == 0);
}
static bool fpregs_is_1_mmx(void *opaque, int version_id)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
int guess_mmx;
guess_mmx = ((env->fptag_vmstate == 0xff) &&
(env->fpus_vmstate & 0x3800) == 0);
return (guess_mmx && (env->fpregs_format_vmstate == 1));
}
static bool fpregs_is_1_no_mmx(void *opaque, int version_id)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
int guess_mmx;
guess_mmx = ((env->fptag_vmstate == 0xff) &&
(env->fpus_vmstate & 0x3800) == 0);
return (!guess_mmx && (env->fpregs_format_vmstate == 1));
}
#define VMSTATE_FP_REGS(_field, _state, _n) \
VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_0, vmstate_fpreg, FPReg), \
VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_mmx, vmstate_fpreg_1_mmx, FPReg), \
VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_no_mmx, vmstate_fpreg_1_no_mmx, FPReg)
static bool version_is_5(void *opaque, int version_id)
{
return version_id == 5;
}
#ifdef TARGET_X86_64
static bool less_than_7(void *opaque, int version_id)
{
return version_id < 7;
}
static int get_uint64_as_uint32(QEMUFile *f, void *pv, size_t size,
VMStateField *field)
{
uint64_t *v = pv;
*v = qemu_get_be32(f);
return 0;
}
static int put_uint64_as_uint32(QEMUFile *f, void *pv, size_t size,
VMStateField *field, QJSON *vmdesc)
{
uint64_t *v = pv;
qemu_put_be32(f, *v);
return 0;
}
static const VMStateInfo vmstate_hack_uint64_as_uint32 = {
.name = "uint64_as_uint32",
.get = get_uint64_as_uint32,
.put = put_uint64_as_uint32,
};
#define VMSTATE_HACK_UINT32(_f, _s, _t) \
VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_hack_uint64_as_uint32, uint64_t)
#endif
static void cpu_pre_save(void *opaque)
{
X86CPU *cpu = opaque;
@ -356,6 +226,10 @@ static int cpu_post_load(void *opaque, int version_id)
return -EINVAL;
}
if (env->fpregs_format_vmstate) {
error_report("Unsupported old non-softfloat CPU state");
return -EINVAL;
}
/*
* Real mode guest segments register DPL should be zero.
* Older KVM version were setting it wrongly.
@ -930,7 +804,7 @@ static const VMStateDescription vmstate_mcg_ext_ctl = {
VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
.minimum_version_id = 3,
.minimum_version_id = 11,
.pre_save = cpu_pre_save,
.post_load = cpu_post_load,
.fields = (VMStateField[]) {
@ -943,7 +817,8 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_UINT16(env.fpus_vmstate, X86CPU),
VMSTATE_UINT16(env.fptag_vmstate, X86CPU),
VMSTATE_UINT16(env.fpregs_format_vmstate, X86CPU),
VMSTATE_FP_REGS(env.fpregs, X86CPU, 8),
VMSTATE_STRUCT_ARRAY(env.fpregs, X86CPU, 8, 0, vmstate_fpreg, FPReg),
VMSTATE_SEGMENT_ARRAY(env.segs, X86CPU, 6),
VMSTATE_SEGMENT(env.ldt, X86CPU),
@ -952,16 +827,8 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_SEGMENT(env.idt, X86CPU),
VMSTATE_UINT32(env.sysenter_cs, X86CPU),
#ifdef TARGET_X86_64
/* Hack: In v7 size changed from 32 to 64 bits on x86_64 */
VMSTATE_HACK_UINT32(env.sysenter_esp, X86CPU, less_than_7),
VMSTATE_HACK_UINT32(env.sysenter_eip, X86CPU, less_than_7),
VMSTATE_UINTTL_V(env.sysenter_esp, X86CPU, 7),
VMSTATE_UINTTL_V(env.sysenter_eip, X86CPU, 7),
#else
VMSTATE_UINTTL(env.sysenter_esp, X86CPU),
VMSTATE_UINTTL(env.sysenter_eip, X86CPU),
#endif
VMSTATE_UINTTL(env.cr[0], X86CPU),
VMSTATE_UINTTL(env.cr[2], X86CPU),
@ -982,46 +849,45 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_UINT64(env.fmask, X86CPU),
VMSTATE_UINT64(env.kernelgsbase, X86CPU),
#endif
VMSTATE_UINT32_V(env.smbase, X86CPU, 4),
VMSTATE_UINT32(env.smbase, X86CPU),
VMSTATE_UINT64_V(env.pat, X86CPU, 5),
VMSTATE_UINT32_V(env.hflags2, X86CPU, 5),
VMSTATE_UINT64(env.pat, X86CPU),
VMSTATE_UINT32(env.hflags2, X86CPU),
VMSTATE_UINT32_TEST(parent_obj.halted, X86CPU, version_is_5),
VMSTATE_UINT64_V(env.vm_hsave, X86CPU, 5),
VMSTATE_UINT64_V(env.vm_vmcb, X86CPU, 5),
VMSTATE_UINT64_V(env.tsc_offset, X86CPU, 5),
VMSTATE_UINT64_V(env.intercept, X86CPU, 5),
VMSTATE_UINT16_V(env.intercept_cr_read, X86CPU, 5),
VMSTATE_UINT16_V(env.intercept_cr_write, X86CPU, 5),
VMSTATE_UINT16_V(env.intercept_dr_read, X86CPU, 5),
VMSTATE_UINT16_V(env.intercept_dr_write, X86CPU, 5),
VMSTATE_UINT32_V(env.intercept_exceptions, X86CPU, 5),
VMSTATE_UINT8_V(env.v_tpr, X86CPU, 5),
VMSTATE_UINT64(env.vm_hsave, X86CPU),
VMSTATE_UINT64(env.vm_vmcb, X86CPU),
VMSTATE_UINT64(env.tsc_offset, X86CPU),
VMSTATE_UINT64(env.intercept, X86CPU),
VMSTATE_UINT16(env.intercept_cr_read, X86CPU),
VMSTATE_UINT16(env.intercept_cr_write, X86CPU),
VMSTATE_UINT16(env.intercept_dr_read, X86CPU),
VMSTATE_UINT16(env.intercept_dr_write, X86CPU),
VMSTATE_UINT32(env.intercept_exceptions, X86CPU),
VMSTATE_UINT8(env.v_tpr, X86CPU),
/* MTRRs */
VMSTATE_UINT64_ARRAY_V(env.mtrr_fixed, X86CPU, 11, 8),
VMSTATE_UINT64_V(env.mtrr_deftype, X86CPU, 8),
VMSTATE_UINT64_ARRAY(env.mtrr_fixed, X86CPU, 11),
VMSTATE_UINT64(env.mtrr_deftype, X86CPU),
VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, MSR_MTRRcap_VCNT, 8),
/* KVM-related states */
VMSTATE_INT32_V(env.interrupt_injected, X86CPU, 9),
VMSTATE_UINT32_V(env.mp_state, X86CPU, 9),
VMSTATE_UINT64_V(env.tsc, X86CPU, 9),
VMSTATE_INT32_V(env.exception_injected, X86CPU, 11),
VMSTATE_UINT8_V(env.soft_interrupt, X86CPU, 11),
VMSTATE_UINT8_V(env.nmi_injected, X86CPU, 11),
VMSTATE_UINT8_V(env.nmi_pending, X86CPU, 11),
VMSTATE_UINT8_V(env.has_error_code, X86CPU, 11),
VMSTATE_UINT32_V(env.sipi_vector, X86CPU, 11),
VMSTATE_INT32(env.interrupt_injected, X86CPU),
VMSTATE_UINT32(env.mp_state, X86CPU),
VMSTATE_UINT64(env.tsc, X86CPU),
VMSTATE_INT32(env.exception_injected, X86CPU),
VMSTATE_UINT8(env.soft_interrupt, X86CPU),
VMSTATE_UINT8(env.nmi_injected, X86CPU),
VMSTATE_UINT8(env.nmi_pending, X86CPU),
VMSTATE_UINT8(env.has_error_code, X86CPU),
VMSTATE_UINT32(env.sipi_vector, X86CPU),
/* MCE */
VMSTATE_UINT64_V(env.mcg_cap, X86CPU, 10),
VMSTATE_UINT64_V(env.mcg_status, X86CPU, 10),
VMSTATE_UINT64_V(env.mcg_ctl, X86CPU, 10),
VMSTATE_UINT64_ARRAY_V(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4, 10),
VMSTATE_UINT64(env.mcg_cap, X86CPU),
VMSTATE_UINT64(env.mcg_status, X86CPU),
VMSTATE_UINT64(env.mcg_ctl, X86CPU),
VMSTATE_UINT64_ARRAY(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4),
/* rdtscp */
VMSTATE_UINT64_V(env.tsc_aux, X86CPU, 11),
VMSTATE_UINT64(env.tsc_aux, X86CPU),
/* KVM pvclock msr */
VMSTATE_UINT64_V(env.system_time_msr, X86CPU, 11),
VMSTATE_UINT64_V(env.wall_clock_msr, X86CPU, 11),
VMSTATE_UINT64(env.system_time_msr, X86CPU),
VMSTATE_UINT64(env.wall_clock_msr, X86CPU),
/* XSAVE related fields */
VMSTATE_UINT64_V(env.xcr0, X86CPU, 12),
VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12),

View File

@ -260,6 +260,7 @@ check-qtest-i386-y += tests/test-filter-mirror$(EXESUF)
check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
check-qtest-i386-y += tests/postcopy-test$(EXESUF)
check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF)
check-qtest-i386-y += tests/numa-test$(EXESUF)
check-qtest-x86_64-y += $(check-qtest-i386-y)
gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@ -300,6 +301,7 @@ check-qtest-ppc64-y += tests/test-netfilter$(EXESUF)
check-qtest-ppc64-y += tests/test-filter-mirror$(EXESUF)
check-qtest-ppc64-y += tests/test-filter-redirector$(EXESUF)
check-qtest-ppc64-y += tests/display-vga-test$(EXESUF)
check-qtest-ppc64-y += tests/numa-test$(EXESUF)
check-qtest-ppc64-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
check-qtest-sh4-y = tests/endianness-test$(EXESUF)
@ -324,6 +326,8 @@ gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c
check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
gcov-files-arm-y += hw/timer/arm_mptimer.c
check-qtest-aarch64-y = tests/numa-test$(EXESUF)
check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
@ -753,6 +757,7 @@ tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-use
tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y)
tests/numa-test$(EXESUF): tests/numa-test.o
tests/migration/stress$(EXESUF): tests/migration/stress.o
$(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"LINK","$(TARGET_DIR)$@")

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -723,7 +723,8 @@ static void test_acpi_piix4_tcg_cphp(void)
data.machine = MACHINE_PC;
data.variant = ".cphp";
test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6"
" -numa node -numa node",
" -numa node -numa node"
" -numa dist,src=0,dst=1,val=21",
&data);
free_test_data(&data);
}
@ -736,7 +737,8 @@ static void test_acpi_q35_tcg_cphp(void)
data.machine = MACHINE_Q35;
data.variant = ".cphp";
test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6"
" -numa node -numa node",
" -numa node -numa node"
" -numa dist,src=0,dst=1,val=21",
&data);
free_test_data(&data);
}
@ -785,7 +787,10 @@ static void test_acpi_q35_tcg_memhp(void)
memset(&data, 0, sizeof(data));
data.machine = MACHINE_Q35;
data.variant = ".memhp";
test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data);
test_acpi_one(" -m 128,slots=3,maxmem=1G"
" -numa node -numa node"
" -numa dist,src=0,dst=1,val=21",
&data);
free_test_data(&data);
}
@ -796,7 +801,10 @@ static void test_acpi_piix4_tcg_memhp(void)
memset(&data, 0, sizeof(data));
data.machine = MACHINE_PC;
data.variant = ".memhp";
test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data);
test_acpi_one(" -m 128,slots=3,maxmem=1G"
" -numa node -numa node"
" -numa dist,src=0,dst=1,val=21",
&data);
free_test_data(&data);
}

302
tests/numa-test.c Normal file
View File

@ -0,0 +1,302 @@
/*
* NUMA configuration test cases
*
* Copyright (c) 2017 Red Hat Inc.
* Authors:
* Igor Mammedov <imammedo@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "libqtest.h"
static char *make_cli(const char *generic_cli, const char *test_cli)
{
return g_strdup_printf("%s %s", generic_cli ? generic_cli : "", test_cli);
}
static char *hmp_info_numa(void)
{
QDict *resp;
char *s;
resp = qmp("{ 'execute': 'human-monitor-command', 'arguments': "
"{ 'command-line': 'info numa '} }");
g_assert(resp);
g_assert(qdict_haskey(resp, "return"));
s = g_strdup(qdict_get_str(resp, "return"));
g_assert(s);
QDECREF(resp);
return s;
}
static void test_mon_explicit(const void *data)
{
char *s;
char *cli;
cli = make_cli(data, "-smp 8 "
"-numa node,nodeid=0,cpus=0-3 "
"-numa node,nodeid=1,cpus=4-7 ");
qtest_start(cli);
s = hmp_info_numa();
g_assert(strstr(s, "node 0 cpus: 0 1 2 3"));
g_assert(strstr(s, "node 1 cpus: 4 5 6 7"));
g_free(s);
qtest_end();
g_free(cli);
}
static void test_mon_default(const void *data)
{
char *s;
char *cli;
cli = make_cli(data, "-smp 8 -numa node -numa node");
qtest_start(cli);
s = hmp_info_numa();
g_assert(strstr(s, "node 0 cpus: 0 2 4 6"));
g_assert(strstr(s, "node 1 cpus: 1 3 5 7"));
g_free(s);
qtest_end();
g_free(cli);
}
static void test_mon_partial(const void *data)
{
char *s;
char *cli;
cli = make_cli(data, "-smp 8 "
"-numa node,nodeid=0,cpus=0-1 "
"-numa node,nodeid=1,cpus=4-5 ");
qtest_start(cli);
s = hmp_info_numa();
g_assert(strstr(s, "node 0 cpus: 0 1 2 3 6 7"));
g_assert(strstr(s, "node 1 cpus: 4 5"));
g_free(s);
qtest_end();
g_free(cli);
}
static QList *get_cpus(QDict **resp)
{
*resp = qmp("{ 'execute': 'query-cpus' }");
g_assert(*resp);
g_assert(qdict_haskey(*resp, "return"));
return qdict_get_qlist(*resp, "return");
}
static void test_query_cpus(const void *data)
{
char *cli;
QDict *resp;
QList *cpus;
const QObject *e;
cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7");
qtest_start(cli);
cpus = get_cpus(&resp);
g_assert(cpus);
while ((e = qlist_pop(cpus))) {
QDict *cpu, *props;
int64_t cpu_idx, node;
cpu = qobject_to_qdict(e);
g_assert(qdict_haskey(cpu, "CPU"));
g_assert(qdict_haskey(cpu, "props"));
cpu_idx = qdict_get_int(cpu, "CPU");
props = qdict_get_qdict(cpu, "props");
g_assert(qdict_haskey(props, "node-id"));
node = qdict_get_int(props, "node-id");
if (cpu_idx >= 0 && cpu_idx < 4) {
g_assert_cmpint(node, ==, 0);
} else {
g_assert_cmpint(node, ==, 1);
}
}
QDECREF(resp);
qtest_end();
g_free(cli);
}
static void pc_numa_cpu(const void *data)
{
char *cli;
QDict *resp;
QList *cpus;
const QObject *e;
cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 "
"-numa node,nodeid=0 -numa node,nodeid=1 "
"-numa cpu,node-id=1,socket-id=0 "
"-numa cpu,node-id=0,socket-id=1,core-id=0 "
"-numa cpu,node-id=0,socket-id=1,core-id=1,thread-id=0 "
"-numa cpu,node-id=1,socket-id=1,core-id=1,thread-id=1");
qtest_start(cli);
cpus = get_cpus(&resp);
g_assert(cpus);
while ((e = qlist_pop(cpus))) {
QDict *cpu, *props;
int64_t socket, core, thread, node;
cpu = qobject_to_qdict(e);
g_assert(qdict_haskey(cpu, "props"));
props = qdict_get_qdict(cpu, "props");
g_assert(qdict_haskey(props, "node-id"));
node = qdict_get_int(props, "node-id");
g_assert(qdict_haskey(props, "socket-id"));
socket = qdict_get_int(props, "socket-id");
g_assert(qdict_haskey(props, "core-id"));
core = qdict_get_int(props, "core-id");
g_assert(qdict_haskey(props, "thread-id"));
thread = qdict_get_int(props, "thread-id");
if (socket == 0) {
g_assert_cmpint(node, ==, 1);
} else if (socket == 1 && core == 0) {
g_assert_cmpint(node, ==, 0);
} else if (socket == 1 && core == 1 && thread == 0) {
g_assert_cmpint(node, ==, 0);
} else if (socket == 1 && core == 1 && thread == 1) {
g_assert_cmpint(node, ==, 1);
} else {
g_assert(false);
}
}
QDECREF(resp);
qtest_end();
g_free(cli);
}
static void spapr_numa_cpu(const void *data)
{
char *cli;
QDict *resp;
QList *cpus;
const QObject *e;
cli = make_cli(data, "-smp 4,cores=4 "
"-numa node,nodeid=0 -numa node,nodeid=1 "
"-numa cpu,node-id=0,core-id=0 "
"-numa cpu,node-id=0,core-id=1 "
"-numa cpu,node-id=0,core-id=2 "
"-numa cpu,node-id=1,core-id=3");
qtest_start(cli);
cpus = get_cpus(&resp);
g_assert(cpus);
while ((e = qlist_pop(cpus))) {
QDict *cpu, *props;
int64_t core, node;
cpu = qobject_to_qdict(e);
g_assert(qdict_haskey(cpu, "props"));
props = qdict_get_qdict(cpu, "props");
g_assert(qdict_haskey(props, "node-id"));
node = qdict_get_int(props, "node-id");
g_assert(qdict_haskey(props, "core-id"));
core = qdict_get_int(props, "core-id");
if (core >= 0 && core < 3) {
g_assert_cmpint(node, ==, 0);
} else if (core == 3) {
g_assert_cmpint(node, ==, 1);
} else {
g_assert(false);
}
}
QDECREF(resp);
qtest_end();
g_free(cli);
}
static void aarch64_numa_cpu(const void *data)
{
char *cli;
QDict *resp;
QList *cpus;
const QObject *e;
cli = make_cli(data, "-smp 2 "
"-numa node,nodeid=0 -numa node,nodeid=1 "
"-numa cpu,node-id=1,thread-id=0 "
"-numa cpu,node-id=0,thread-id=1");
qtest_start(cli);
cpus = get_cpus(&resp);
g_assert(cpus);
while ((e = qlist_pop(cpus))) {
QDict *cpu, *props;
int64_t thread, node;
cpu = qobject_to_qdict(e);
g_assert(qdict_haskey(cpu, "props"));
props = qdict_get_qdict(cpu, "props");
g_assert(qdict_haskey(props, "node-id"));
node = qdict_get_int(props, "node-id");
g_assert(qdict_haskey(props, "thread-id"));
thread = qdict_get_int(props, "thread-id");
if (thread == 0) {
g_assert_cmpint(node, ==, 1);
} else if (thread == 1) {
g_assert_cmpint(node, ==, 0);
} else {
g_assert(false);
}
}
QDECREF(resp);
qtest_end();
g_free(cli);
}
int main(int argc, char **argv)
{
const char *args = NULL;
const char *arch = qtest_get_arch();
if (strcmp(arch, "aarch64") == 0) {
args = "-machine virt";
}
g_test_init(&argc, &argv, NULL);
qtest_add_data_func("/numa/mon/default", args, test_mon_default);
qtest_add_data_func("/numa/mon/cpus/explicit", args, test_mon_explicit);
qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial);
qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus);
if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) {
qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu);
}
if (!strcmp(arch, "ppc64")) {
qtest_add_data_func("/numa/spapr/cpu/explicit", args, spapr_numa_cpu);
}
if (!strcmp(arch, "aarch64")) {
qtest_add_data_func("/numa/aarch64/cpu/explicit", args,
aarch64_numa_cpu);
}
return g_test_run();
}

6
vl.c
View File

@ -4504,7 +4504,7 @@ int main(int argc, char **argv, char **envp)
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
parse_numa_opts(machine_class);
parse_numa_opts(current_machine);
if (qemu_opts_foreach(qemu_find_opts("mon"),
mon_init_func, NULL, NULL)) {
@ -4560,7 +4560,7 @@ int main(int argc, char **argv, char **envp)
current_machine->boot_order = boot_order;
current_machine->cpu_model = cpu_model;
machine_class->init(current_machine);
machine_run_board_init(current_machine);
realtime_init();
@ -4593,8 +4593,6 @@ int main(int argc, char **argv, char **envp)
cpu_synchronize_all_post_init();
numa_post_machine_init();
rom_reset_order_override();
/*