qemu/include/sysemu/numa.h

117 lines
3.3 KiB
C
Raw Normal View History

#ifndef SYSEMU_NUMA_H
#define SYSEMU_NUMA_H
#include "qemu/bitmap.h"
#include "qapi/qapi-types-machine.h"
#include "exec/cpu-common.h"
struct CPUArchId;
#define MAX_NODES 128
#define NUMA_NODE_UNASSIGNED MAX_NODES
#define NUMA_DISTANCE_MIN 10
#define NUMA_DISTANCE_DEFAULT 20
#define NUMA_DISTANCE_MAX 254
#define NUMA_DISTANCE_UNREACHABLE 255
/* the value of AcpiHmatLBInfo flags */
enum {
HMAT_LB_MEM_MEMORY = 0,
HMAT_LB_MEM_CACHE_1ST_LEVEL = 1,
HMAT_LB_MEM_CACHE_2ND_LEVEL = 2,
HMAT_LB_MEM_CACHE_3RD_LEVEL = 3,
HMAT_LB_LEVELS /* must be the last entry */
};
/* the value of AcpiHmatLBInfo data type */
enum {
HMAT_LB_DATA_ACCESS_LATENCY = 0,
HMAT_LB_DATA_READ_LATENCY = 1,
HMAT_LB_DATA_WRITE_LATENCY = 2,
HMAT_LB_DATA_ACCESS_BANDWIDTH = 3,
HMAT_LB_DATA_READ_BANDWIDTH = 4,
HMAT_LB_DATA_WRITE_BANDWIDTH = 5,
HMAT_LB_TYPES /* must be the last entry */
};
#define UINT16_BITS 16
struct NodeInfo {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
bool has_cpu;
uint8_t lb_info_provided;
uint16_t initiator;
uint8_t distance[MAX_NODES];
numa: equally distribute memory on nodes When there are more nodes than available memory to put the minimum allowed memory by node, all the memory is put on the last node. This is because we put (ram_size / nb_numa_nodes) & ~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this case the value is 0. This is particularly true with pseries, as the memory must be aligned to 256MB. To avoid this problem, this patch uses an error diffusion algorithm [1] to distribute equally the memory on nodes. We introduce numa_auto_assign_ram() function in MachineClass to keep compatibility between machine type versions. The legacy function is used with pseries-2.9, pc-q35-2.9 and pc-i440fx-2.9 (and previous), the new one with all others. Example: qemu-system-ppc64 -S -nographic -nodefaults -monitor stdio -m 1G -smp 8 \ -numa node -numa node -numa node \ -numa node -numa node -numa node Before: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 0 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 0 MB node 4 cpus: 4 node 4 size: 0 MB node 5 cpus: 5 node 5 size: 1024 MB After: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 256 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 256 MB node 4 cpus: 4 node 4 size: 256 MB node 5 cpus: 5 node 5 size: 256 MB [1] https://en.wikipedia.org/wiki/Error_diffusion Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20170502162955.1610-2-lvivier@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> [ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-02 19:29:55 +03:00
};
struct NumaNodeMem {
uint64_t node_mem;
uint64_t node_plugged_mem;
};
struct HMAT_LB_Data {
uint8_t initiator;
uint8_t target;
uint64_t data;
};
typedef struct HMAT_LB_Data HMAT_LB_Data;
struct HMAT_LB_Info {
/* Indicates it's memory or the specified level memory side cache. */
uint8_t hierarchy;
/* Present the type of data, access/read/write latency or bandwidth. */
uint8_t data_type;
/* The range bitmap of bandwidth for calculating common base */
uint64_t range_bitmap;
/* The common base unit for latencies or bandwidths */
uint64_t base;
/* Array to store the latencies or bandwidths */
GArray *list;
};
typedef struct HMAT_LB_Info HMAT_LB_Info;
struct NumaState {
/* Number of NUMA nodes */
int num_nodes;
/* Allow setting NUMA distance for different NUMA nodes */
bool have_numa_distance;
/* Detect if HMAT support is enabled. */
bool hmat_enabled;
/* NUMA nodes information */
NodeInfo nodes[MAX_NODES];
/* NUMA nodes HMAT Locality Latency and Bandwidth Information */
HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
/* Memory Side Cache Information Structure */
NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
};
typedef struct NumaState NumaState;
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
void parse_numa_opts(MachineState *ms);
void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
Error **errp);
void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
Error **errp);
void numa_complete_configuration(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
extern QemuOptsList qemu_numa_opts;
numa: equally distribute memory on nodes When there are more nodes than available memory to put the minimum allowed memory by node, all the memory is put on the last node. This is because we put (ram_size / nb_numa_nodes) & ~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this case the value is 0. This is particularly true with pseries, as the memory must be aligned to 256MB. To avoid this problem, this patch uses an error diffusion algorithm [1] to distribute equally the memory on nodes. We introduce numa_auto_assign_ram() function in MachineClass to keep compatibility between machine type versions. The legacy function is used with pseries-2.9, pc-q35-2.9 and pc-i440fx-2.9 (and previous), the new one with all others. Example: qemu-system-ppc64 -S -nographic -nodefaults -monitor stdio -m 1G -smp 8 \ -numa node -numa node -numa node \ -numa node -numa node -numa node Before: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 0 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 0 MB node 4 cpus: 4 node 4 size: 0 MB node 5 cpus: 5 node 5 size: 1024 MB After: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 256 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 256 MB node 4 cpus: 4 node 4 size: 256 MB node 5 cpus: 5 node 5 size: 256 MB [1] https://en.wikipedia.org/wiki/Error_diffusion Signed-off-by: Laurent Vivier <lvivier@redhat.com> Message-Id: <20170502162955.1610-2-lvivier@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> [ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()] Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-02 19:29:55 +03:00
void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
void numa_cpu_pre_plug(const struct CPUArchId *slot, DeviceState *dev,
Error **errp);
#endif