NUMA: Add numa_info structure to contain numa nodes info
Add the numa_info structure to contain the numa nodes memory, VCPUs information and the future added numa nodes host memory policies. Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Signed-off-by: Andre Przywara <andre.przywara@amd.com> Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com> [Fix hw/ppc/spapr.c - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
2b631ec255
commit
8c85901ed3
12
hw/i386/pc.c
12
hw/i386/pc.c
@ -704,14 +704,14 @@ static FWCfgState *bochs_bios_init(void)
|
|||||||
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
|
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
|
||||||
assert(apic_id < apic_id_limit);
|
assert(apic_id < apic_id_limit);
|
||||||
for (j = 0; j < nb_numa_nodes; j++) {
|
for (j = 0; j < nb_numa_nodes; j++) {
|
||||||
if (test_bit(i, node_cpumask[j])) {
|
if (test_bit(i, numa_info[j].node_cpu)) {
|
||||||
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
|
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (i = 0; i < nb_numa_nodes; i++) {
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
|
numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
|
||||||
}
|
}
|
||||||
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
|
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
|
||||||
(1 + apic_id_limit + nb_numa_nodes) *
|
(1 + apic_id_limit + nb_numa_nodes) *
|
||||||
@ -1122,8 +1122,12 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
|
|||||||
guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
|
guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
|
||||||
guest_info->apic_xrupt_override = kvm_allows_irq0_override();
|
guest_info->apic_xrupt_override = kvm_allows_irq0_override();
|
||||||
guest_info->numa_nodes = nb_numa_nodes;
|
guest_info->numa_nodes = nb_numa_nodes;
|
||||||
guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes *
|
guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
|
||||||
sizeof *guest_info->node_mem);
|
sizeof *guest_info->node_mem);
|
||||||
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
|
guest_info->node_mem[i] = numa_info[i].node_mem;
|
||||||
|
}
|
||||||
|
|
||||||
guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
|
guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit *
|
||||||
sizeof *guest_info->node_cpu);
|
sizeof *guest_info->node_cpu);
|
||||||
|
|
||||||
@ -1131,7 +1135,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
|
|||||||
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
|
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
|
||||||
assert(apic_id < guest_info->apic_id_limit);
|
assert(apic_id < guest_info->apic_id_limit);
|
||||||
for (j = 0; j < nb_numa_nodes; j++) {
|
for (j = 0; j < nb_numa_nodes; j++) {
|
||||||
if (test_bit(i, node_cpumask[j])) {
|
if (test_bit(i, numa_info[j].node_cpu)) {
|
||||||
guest_info->node_cpu[apic_id] = j;
|
guest_info->node_cpu[apic_id] = j;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -673,8 +673,8 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
|
|||||||
int i, off;
|
int i, off;
|
||||||
|
|
||||||
/* memory node(s) */
|
/* memory node(s) */
|
||||||
if (nb_numa_nodes > 1 && node_mem[0] < ram_size) {
|
if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
|
||||||
node0_size = node_mem[0];
|
node0_size = numa_info[0].node_mem;
|
||||||
} else {
|
} else {
|
||||||
node0_size = ram_size;
|
node0_size = ram_size;
|
||||||
}
|
}
|
||||||
@ -712,7 +712,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
|
|||||||
if (mem_start >= ram_size) {
|
if (mem_start >= ram_size) {
|
||||||
node_size = 0;
|
node_size = 0;
|
||||||
} else {
|
} else {
|
||||||
node_size = node_mem[i];
|
node_size = numa_info[i].node_mem;
|
||||||
if (node_size > ram_size - mem_start) {
|
if (node_size > ram_size - mem_start) {
|
||||||
node_size = ram_size - mem_start;
|
node_size = ram_size - mem_start;
|
||||||
}
|
}
|
||||||
@ -857,7 +857,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
|
|||||||
|
|
||||||
/* Update the RMA size if necessary */
|
/* Update the RMA size if necessary */
|
||||||
if (spapr->vrma_adjust) {
|
if (spapr->vrma_adjust) {
|
||||||
hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
|
hwaddr node0_size = (nb_numa_nodes > 1) ?
|
||||||
|
numa_info[0].node_mem : ram_size;
|
||||||
spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
|
spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1289,7 +1290,7 @@ static void ppc_spapr_init(MachineState *machine)
|
|||||||
MemoryRegion *sysmem = get_system_memory();
|
MemoryRegion *sysmem = get_system_memory();
|
||||||
MemoryRegion *ram = g_new(MemoryRegion, 1);
|
MemoryRegion *ram = g_new(MemoryRegion, 1);
|
||||||
hwaddr rma_alloc_size;
|
hwaddr rma_alloc_size;
|
||||||
hwaddr node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
|
hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
|
||||||
uint32_t initrd_base = 0;
|
uint32_t initrd_base = 0;
|
||||||
long kernel_size = 0, initrd_size = 0;
|
long kernel_size = 0, initrd_size = 0;
|
||||||
long load_limit, rtas_limit, fw_size;
|
long load_limit, rtas_limit, fw_size;
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include "qapi-types.h"
|
#include "qapi-types.h"
|
||||||
#include "qemu/notify.h"
|
#include "qemu/notify.h"
|
||||||
#include "qemu/main-loop.h"
|
#include "qemu/main-loop.h"
|
||||||
|
#include "qemu/bitmap.h"
|
||||||
|
|
||||||
/* vl.c */
|
/* vl.c */
|
||||||
|
|
||||||
@ -142,8 +143,11 @@ extern QEMUClockType rtc_clock;
|
|||||||
#define MAX_CPUMASK_BITS 255
|
#define MAX_CPUMASK_BITS 255
|
||||||
|
|
||||||
extern int nb_numa_nodes;
|
extern int nb_numa_nodes;
|
||||||
extern uint64_t node_mem[MAX_NODES];
|
typedef struct node_info {
|
||||||
extern unsigned long *node_cpumask[MAX_NODES];
|
uint64_t node_mem;
|
||||||
|
DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
|
||||||
|
} NodeInfo;
|
||||||
|
extern NodeInfo numa_info[MAX_NODES];
|
||||||
void numa_add(const char *optarg);
|
void numa_add(const char *optarg);
|
||||||
void set_numa_nodes(void);
|
void set_numa_nodes(void);
|
||||||
void set_numa_modes(void);
|
void set_numa_modes(void);
|
||||||
|
@ -2011,7 +2011,7 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
|
|||||||
}
|
}
|
||||||
monitor_printf(mon, "\n");
|
monitor_printf(mon, "\n");
|
||||||
monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
|
monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
|
||||||
node_mem[i] >> 20);
|
numa_info[i].node_mem >> 20);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
23
numa.c
23
numa.c
@ -65,7 +65,7 @@ static void numa_node_parse_cpus(int nodenr, const char *cpus)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
|
bitmap_set(numa_info[nodenr].node_cpu, value, endvalue-value+1);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
@ -105,7 +105,7 @@ void numa_add(const char *optarg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (get_param_value(option, 128, "mem", optarg) == 0) {
|
if (get_param_value(option, 128, "mem", optarg) == 0) {
|
||||||
node_mem[nodenr] = 0;
|
numa_info[nodenr].node_mem = 0;
|
||||||
} else {
|
} else {
|
||||||
int64_t sval;
|
int64_t sval;
|
||||||
sval = strtosz(option, &endptr);
|
sval = strtosz(option, &endptr);
|
||||||
@ -113,7 +113,7 @@ void numa_add(const char *optarg)
|
|||||||
fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
|
fprintf(stderr, "qemu: invalid numa mem size: %s\n", optarg);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
node_mem[nodenr] = sval;
|
numa_info[nodenr].node_mem = sval;
|
||||||
}
|
}
|
||||||
if (get_param_value(option, 128, "cpus", optarg) != 0) {
|
if (get_param_value(option, 128, "cpus", optarg) != 0) {
|
||||||
numa_node_parse_cpus(nodenr, option);
|
numa_node_parse_cpus(nodenr, option);
|
||||||
@ -139,7 +139,7 @@ void set_numa_nodes(void)
|
|||||||
* and distribute the available memory equally across all nodes
|
* and distribute the available memory equally across all nodes
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < nb_numa_nodes; i++) {
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
if (node_mem[i] != 0) {
|
if (numa_info[i].node_mem != 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -150,15 +150,16 @@ void set_numa_nodes(void)
|
|||||||
* the final node gets the rest.
|
* the final node gets the rest.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < nb_numa_nodes - 1; i++) {
|
for (i = 0; i < nb_numa_nodes - 1; i++) {
|
||||||
node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
|
numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
|
||||||
usedmem += node_mem[i];
|
~((1 << 23UL) - 1);
|
||||||
|
usedmem += numa_info[i].node_mem;
|
||||||
}
|
}
|
||||||
node_mem[i] = ram_size - usedmem;
|
numa_info[i].node_mem = ram_size - usedmem;
|
||||||
}
|
}
|
||||||
|
|
||||||
numa_total = 0;
|
numa_total = 0;
|
||||||
for (i = 0; i < nb_numa_nodes; i++) {
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
numa_total += node_mem[i];
|
numa_total += numa_info[i].node_mem;
|
||||||
}
|
}
|
||||||
if (numa_total != ram_size) {
|
if (numa_total != ram_size) {
|
||||||
error_report("total memory for NUMA nodes (%" PRIu64 ")"
|
error_report("total memory for NUMA nodes (%" PRIu64 ")"
|
||||||
@ -168,7 +169,7 @@ void set_numa_nodes(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nb_numa_nodes; i++) {
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
|
if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -178,7 +179,7 @@ void set_numa_nodes(void)
|
|||||||
*/
|
*/
|
||||||
if (i == nb_numa_nodes) {
|
if (i == nb_numa_nodes) {
|
||||||
for (i = 0; i < max_cpus; i++) {
|
for (i = 0; i < max_cpus; i++) {
|
||||||
set_bit(i, node_cpumask[i % nb_numa_nodes]);
|
set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -191,7 +192,7 @@ void set_numa_modes(void)
|
|||||||
|
|
||||||
CPU_FOREACH(cpu) {
|
CPU_FOREACH(cpu) {
|
||||||
for (i = 0; i < nb_numa_nodes; i++) {
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
if (test_bit(cpu->cpu_index, node_cpumask[i])) {
|
if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
|
||||||
cpu->numa_node = i;
|
cpu->numa_node = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
7
vl.c
7
vl.c
@ -195,8 +195,7 @@ static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
|
|||||||
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
|
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
|
||||||
|
|
||||||
int nb_numa_nodes;
|
int nb_numa_nodes;
|
||||||
uint64_t node_mem[MAX_NODES];
|
NodeInfo numa_info[MAX_NODES];
|
||||||
unsigned long *node_cpumask[MAX_NODES];
|
|
||||||
|
|
||||||
uint8_t qemu_uuid[16];
|
uint8_t qemu_uuid[16];
|
||||||
bool qemu_uuid_set;
|
bool qemu_uuid_set;
|
||||||
@ -2959,8 +2958,8 @@ int main(int argc, char **argv, char **envp)
|
|||||||
translation = BIOS_ATA_TRANSLATION_AUTO;
|
translation = BIOS_ATA_TRANSLATION_AUTO;
|
||||||
|
|
||||||
for (i = 0; i < MAX_NODES; i++) {
|
for (i = 0; i < MAX_NODES; i++) {
|
||||||
node_mem[i] = 0;
|
numa_info[i].node_mem = 0;
|
||||||
node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
|
bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
|
||||||
}
|
}
|
||||||
|
|
||||||
nb_numa_nodes = 0;
|
nb_numa_nodes = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user