added -numa cmdline parameter parser (Andre Przywara)
adds a -numa command line parameter and sets a QEMU global array with the memory sizes. The CPU-to-node assignemnt is written into the CPUState. If no specific values for memory and CPUs are given, all resources will be split equally across all nodes. This code currently support only up to 64 virtual CPUs. Signed-off-by: Andre Przywara <andre.przywara@amd.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7210 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
parent
754d00ae3a
commit
268a362c63
@ -205,6 +205,7 @@ typedef struct CPUWatchpoint {
|
|||||||
\
|
\
|
||||||
CPUState *next_cpu; /* next CPU sharing TB cache */ \
|
CPUState *next_cpu; /* next CPU sharing TB cache */ \
|
||||||
int cpu_index; /* CPU index (informative) */ \
|
int cpu_index; /* CPU index (informative) */ \
|
||||||
|
int numa_node; /* NUMA node this cpu is belonging to */ \
|
||||||
int running; /* Nonzero if cpu is currently running(usermode). */ \
|
int running; /* Nonzero if cpu is currently running(usermode). */ \
|
||||||
/* user data */ \
|
/* user data */ \
|
||||||
void *opaque; \
|
void *opaque; \
|
||||||
|
1
exec.c
1
exec.c
@ -554,6 +554,7 @@ void cpu_exec_init(CPUState *env)
|
|||||||
cpu_index++;
|
cpu_index++;
|
||||||
}
|
}
|
||||||
env->cpu_index = cpu_index;
|
env->cpu_index = cpu_index;
|
||||||
|
env->numa_node = 0;
|
||||||
TAILQ_INIT(&env->breakpoints);
|
TAILQ_INIT(&env->breakpoints);
|
||||||
TAILQ_INIT(&env->watchpoints);
|
TAILQ_INIT(&env->watchpoints);
|
||||||
*penv = env;
|
*penv = env;
|
||||||
|
@ -47,6 +47,14 @@ CPUs are supported. On Sparc32 target, Linux limits the number of usable CPUs
|
|||||||
to 4.
|
to 4.
|
||||||
ETEXI
|
ETEXI
|
||||||
|
|
||||||
|
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
|
||||||
|
"-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n")
|
||||||
|
STEXI
|
||||||
|
@item -numa @var{opts}
|
||||||
|
Simulate a multi node NUMA system. If mem and cpus are omitted, resources
|
||||||
|
are split equally.
|
||||||
|
ETEXI
|
||||||
|
|
||||||
DEF("fda", HAS_ARG, QEMU_OPTION_fda,
|
DEF("fda", HAS_ARG, QEMU_OPTION_fda,
|
||||||
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n")
|
"-fda/-fdb file use 'file' as floppy disk 0/1 image\n")
|
||||||
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "")
|
DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "")
|
||||||
|
6
sysemu.h
6
sysemu.h
@ -108,6 +108,10 @@ extern int old_param;
|
|||||||
extern int kqemu_allowed;
|
extern int kqemu_allowed;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define MAX_NODES 64
|
||||||
|
extern int nb_numa_nodes;
|
||||||
|
extern uint64_t node_mem[MAX_NODES];
|
||||||
|
|
||||||
#define MAX_OPTION_ROMS 16
|
#define MAX_OPTION_ROMS 16
|
||||||
extern const char *option_rom[MAX_OPTION_ROMS];
|
extern const char *option_rom[MAX_OPTION_ROMS];
|
||||||
extern int nb_option_roms;
|
extern int nb_option_roms;
|
||||||
@ -248,7 +252,7 @@ void do_usb_add(Monitor *mon, const char *devname);
|
|||||||
void do_usb_del(Monitor *mon, const char *devname);
|
void do_usb_del(Monitor *mon, const char *devname);
|
||||||
void usb_info(Monitor *mon);
|
void usb_info(Monitor *mon);
|
||||||
|
|
||||||
const char *get_opt_name(char *buf, int buf_size, const char *p);
|
const char *get_opt_name(char *buf, int buf_size, const char *p, char delim);
|
||||||
const char *get_opt_value(char *buf, int buf_size, const char *p);
|
const char *get_opt_value(char *buf, int buf_size, const char *p);
|
||||||
int get_param_value(char *buf, int buf_size,
|
int get_param_value(char *buf, int buf_size,
|
||||||
const char *tag, const char *str);
|
const char *tag, const char *str);
|
||||||
|
133
vl.c
133
vl.c
@ -265,6 +265,10 @@ const char *prom_envs[MAX_PROM_ENVS];
|
|||||||
int nb_drives_opt;
|
int nb_drives_opt;
|
||||||
struct drive_opt drives_opt[MAX_DRIVES];
|
struct drive_opt drives_opt[MAX_DRIVES];
|
||||||
|
|
||||||
|
int nb_numa_nodes;
|
||||||
|
uint64_t node_mem[MAX_NODES];
|
||||||
|
uint64_t node_cpumask[MAX_NODES];
|
||||||
|
|
||||||
static CPUState *cur_cpu;
|
static CPUState *cur_cpu;
|
||||||
static CPUState *next_cpu;
|
static CPUState *next_cpu;
|
||||||
static int event_pending = 1;
|
static int event_pending = 1;
|
||||||
@ -1865,12 +1869,12 @@ static int socket_init(void)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const char *get_opt_name(char *buf, int buf_size, const char *p)
|
const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
|
||||||
{
|
{
|
||||||
char *q;
|
char *q;
|
||||||
|
|
||||||
q = buf;
|
q = buf;
|
||||||
while (*p != '\0' && *p != '=') {
|
while (*p != '\0' && *p != delim) {
|
||||||
if (q && (q - buf) < buf_size - 1)
|
if (q && (q - buf) < buf_size - 1)
|
||||||
*q++ = *p;
|
*q++ = *p;
|
||||||
p++;
|
p++;
|
||||||
@ -1910,7 +1914,7 @@ int get_param_value(char *buf, int buf_size,
|
|||||||
|
|
||||||
p = str;
|
p = str;
|
||||||
for(;;) {
|
for(;;) {
|
||||||
p = get_opt_name(option, sizeof(option), p);
|
p = get_opt_name(option, sizeof(option), p, '=');
|
||||||
if (*p != '=')
|
if (*p != '=')
|
||||||
break;
|
break;
|
||||||
p++;
|
p++;
|
||||||
@ -1935,7 +1939,7 @@ int check_params(char *buf, int buf_size,
|
|||||||
|
|
||||||
p = str;
|
p = str;
|
||||||
while (*p != '\0') {
|
while (*p != '\0') {
|
||||||
p = get_opt_name(buf, buf_size, p);
|
p = get_opt_name(buf, buf_size, p, '=');
|
||||||
if (*p != '=')
|
if (*p != '=')
|
||||||
return -1;
|
return -1;
|
||||||
p++;
|
p++;
|
||||||
@ -2628,6 +2632,62 @@ int drive_init(struct drive_opt *arg, int snapshot, void *opaque)
|
|||||||
return drives_table_idx;
|
return drives_table_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void numa_add(const char *optarg)
|
||||||
|
{
|
||||||
|
char option[128];
|
||||||
|
char *endptr;
|
||||||
|
unsigned long long value, endvalue;
|
||||||
|
int nodenr;
|
||||||
|
|
||||||
|
optarg = get_opt_name(option, 128, optarg, ',') + 1;
|
||||||
|
if (!strcmp(option, "node")) {
|
||||||
|
if (get_param_value(option, 128, "nodeid", optarg) == 0) {
|
||||||
|
nodenr = nb_numa_nodes;
|
||||||
|
} else {
|
||||||
|
nodenr = strtoull(option, NULL, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get_param_value(option, 128, "mem", optarg) == 0) {
|
||||||
|
node_mem[nodenr] = 0;
|
||||||
|
} else {
|
||||||
|
value = strtoull(option, &endptr, 0);
|
||||||
|
switch (*endptr) {
|
||||||
|
case 0: case 'M': case 'm':
|
||||||
|
value <<= 20;
|
||||||
|
break;
|
||||||
|
case 'G': case 'g':
|
||||||
|
value <<= 30;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
node_mem[nodenr] = value;
|
||||||
|
}
|
||||||
|
if (get_param_value(option, 128, "cpus", optarg) == 0) {
|
||||||
|
node_cpumask[nodenr] = 0;
|
||||||
|
} else {
|
||||||
|
value = strtoull(option, &endptr, 10);
|
||||||
|
if (value >= 64) {
|
||||||
|
value = 63;
|
||||||
|
fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
|
||||||
|
} else {
|
||||||
|
if (*endptr == '-') {
|
||||||
|
endvalue = strtoull(endptr+1, &endptr, 10);
|
||||||
|
if (endvalue >= 63) {
|
||||||
|
endvalue = 62;
|
||||||
|
fprintf(stderr,
|
||||||
|
"only 63 CPUs in NUMA mode supported.\n");
|
||||||
|
}
|
||||||
|
value = (1 << (endvalue + 1)) - (1 << value);
|
||||||
|
} else {
|
||||||
|
value = 1 << value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node_cpumask[nodenr] = value;
|
||||||
|
}
|
||||||
|
nb_numa_nodes++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/***********************************************************/
|
/***********************************************************/
|
||||||
/* USB devices */
|
/* USB devices */
|
||||||
|
|
||||||
@ -4290,6 +4350,7 @@ int main(int argc, char **argv, char **envp)
|
|||||||
const char *chroot_dir = NULL;
|
const char *chroot_dir = NULL;
|
||||||
const char *run_as = NULL;
|
const char *run_as = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
CPUState *env;
|
||||||
|
|
||||||
qemu_cache_utils_init(envp);
|
qemu_cache_utils_init(envp);
|
||||||
|
|
||||||
@ -4353,12 +4414,18 @@ int main(int argc, char **argv, char **envp)
|
|||||||
virtio_consoles[i] = NULL;
|
virtio_consoles[i] = NULL;
|
||||||
virtio_console_index = 0;
|
virtio_console_index = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_NODES; i++) {
|
||||||
|
node_mem[i] = 0;
|
||||||
|
node_cpumask[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
usb_devices_index = 0;
|
usb_devices_index = 0;
|
||||||
|
|
||||||
nb_net_clients = 0;
|
nb_net_clients = 0;
|
||||||
nb_bt_opts = 0;
|
nb_bt_opts = 0;
|
||||||
nb_drives = 0;
|
nb_drives = 0;
|
||||||
nb_drives_opt = 0;
|
nb_drives_opt = 0;
|
||||||
|
nb_numa_nodes = 0;
|
||||||
hda_index = -1;
|
hda_index = -1;
|
||||||
|
|
||||||
nb_nics = 0;
|
nb_nics = 0;
|
||||||
@ -4508,6 +4575,13 @@ int main(int argc, char **argv, char **envp)
|
|||||||
",trans=none" : "");
|
",trans=none" : "");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case QEMU_OPTION_numa:
|
||||||
|
if (nb_numa_nodes >= MAX_NODES) {
|
||||||
|
fprintf(stderr, "qemu: too many NUMA nodes\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
numa_add(optarg);
|
||||||
|
break;
|
||||||
case QEMU_OPTION_nographic:
|
case QEMU_OPTION_nographic:
|
||||||
nographic = 1;
|
nographic = 1;
|
||||||
break;
|
break;
|
||||||
@ -5211,6 +5285,48 @@ int main(int argc, char **argv, char **envp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nb_numa_nodes > 0) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (nb_numa_nodes > smp_cpus) {
|
||||||
|
nb_numa_nodes = smp_cpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If no memory size if given for any node, assume the default case
|
||||||
|
* and distribute the available memory equally across all nodes
|
||||||
|
*/
|
||||||
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
|
if (node_mem[i] != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i == nb_numa_nodes) {
|
||||||
|
uint64_t usedmem = 0;
|
||||||
|
|
||||||
|
/* On Linux, the each node's border has to be 8MB aligned,
|
||||||
|
* the final node gets the rest.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < nb_numa_nodes - 1; i++) {
|
||||||
|
node_mem[i] = (ram_size / nb_numa_nodes) & ~((1 << 23UL) - 1);
|
||||||
|
usedmem += node_mem[i];
|
||||||
|
}
|
||||||
|
node_mem[i] = ram_size - usedmem;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
|
if (node_cpumask[i] != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* assigning the VCPUs round-robin is easier to implement, guest OSes
|
||||||
|
* must cope with this anyway, because there are BIOSes out there in
|
||||||
|
* real machines which also use this scheme.
|
||||||
|
*/
|
||||||
|
if (i == nb_numa_nodes) {
|
||||||
|
for (i = 0; i < smp_cpus; i++) {
|
||||||
|
node_cpumask[i % nb_numa_nodes] |= 1 << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (kvm_enabled()) {
|
if (kvm_enabled()) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -5274,6 +5390,15 @@ int main(int argc, char **argv, char **envp)
|
|||||||
machine->init(ram_size, vga_ram_size, boot_devices,
|
machine->init(ram_size, vga_ram_size, boot_devices,
|
||||||
kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
|
kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
|
||||||
|
|
||||||
|
|
||||||
|
for (env = first_cpu; env != NULL; env = env->next_cpu) {
|
||||||
|
for (i = 0; i < nb_numa_nodes; i++) {
|
||||||
|
if (node_cpumask[i] & (1 << env->cpu_index)) {
|
||||||
|
env->numa_node = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
current_machine = machine;
|
current_machine = machine;
|
||||||
|
|
||||||
/* Set KVM's vcpu state to qemu's initial CPUState. */
|
/* Set KVM's vcpu state to qemu's initial CPUState. */
|
||||||
|
Loading…
Reference in New Issue
Block a user