11c2fd3e01
uses the QEMU firmware configuration interfacce to send the NUMA topology to the BIOS, which has to setup the tables. Only one firmware configuration channel is used. Signed-off-by: Andre Przywara <andre.przywara@amd.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7212 c046a42c-6fe2-441c-8c8c-71466251a162
306 lines
9.6 KiB
Diff
306 lines
9.6 KiB
Diff
add SRAT ACPI table support (Andre Przywara)
|
|
|
|
Take NUMA topology info from the QEMU firmware configuration interface
|
|
(number of nodes, node for each (V)CPU and amount of memory) and build
|
|
a SRAT table describing this topology for the guest OS. Handles more than
|
|
4 GB of RAM by including a hole for 32bit PCI memory mapping.
|
|
|
|
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
|
|
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
diff --git a/bios/rombios32.c b/bios/rombios32.c
|
|
index 49dfd62..d8f6d4e 100644
|
|
--- a/bios/rombios32.c
|
|
+++ b/bios/rombios32.c
|
|
@@ -450,6 +450,11 @@ int pm_sci_int;
|
|
unsigned long bios_table_cur_addr;
|
|
unsigned long bios_table_end_addr;
|
|
|
|
+static inline uint64_t le64_to_cpu(uint64_t x)
|
|
+{
|
|
+ return x;
|
|
+}
|
|
+
|
|
void wrmsr_smp(uint32_t index, uint64_t val)
|
|
{
|
|
static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
|
|
@@ -468,6 +473,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
|
|
#define QEMU_CFG_SIGNATURE 0x00
|
|
#define QEMU_CFG_ID 0x01
|
|
#define QEMU_CFG_UUID 0x02
|
|
+#define QEMU_CFG_NUMA 0x0D
|
|
#define QEMU_CFG_ARCH_LOCAL 0x8000
|
|
#define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0)
|
|
#define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1)
|
|
@@ -529,6 +535,14 @@ static uint16_t smbios_entries(void)
|
|
|
|
return cnt;
|
|
}
|
|
+
|
|
+uint64_t qemu_cfg_get64 (void)
|
|
+{
|
|
+ uint64_t ret;
|
|
+
|
|
+ qemu_cfg_read((uint8_t*)&ret, 8);
|
|
+ return le64_to_cpu(ret);
|
|
+}
|
|
#endif
|
|
|
|
void cpu_probe(void)
|
|
@@ -1281,7 +1295,7 @@ struct rsdt_descriptor_rev1
|
|
{
|
|
ACPI_TABLE_HEADER_DEF /* ACPI common table header */
|
|
#ifdef BX_QEMU
|
|
- uint32_t table_offset_entry [4]; /* Array of pointers to other */
|
|
+ uint32_t table_offset_entry [5]; /* Array of pointers to other */
|
|
#else
|
|
uint32_t table_offset_entry [3]; /* Array of pointers to other */
|
|
#endif
|
|
@@ -1389,7 +1403,7 @@ struct multiple_apic_table
|
|
} __attribute__((__packed__));
|
|
|
|
|
|
-/* Values for Type in APIC_HEADER_DEF */
|
|
+/* Values for Type in APIC sub-headers */
|
|
|
|
#define APIC_PROCESSOR 0
|
|
#define APIC_IO 1
|
|
@@ -1402,18 +1416,18 @@ struct multiple_apic_table
|
|
#define APIC_XRUPT_SOURCE 8
|
|
#define APIC_RESERVED 9 /* 9 and greater are reserved */
|
|
|
|
-/*
|
|
- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
|
|
- */
|
|
-#define APIC_HEADER_DEF /* Common APIC sub-structure header */\
|
|
+#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\
|
|
uint8_t type; \
|
|
uint8_t length;
|
|
|
|
+/*
|
|
+ * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
|
|
+ */
|
|
/* Sub-structures for MADT */
|
|
|
|
struct madt_processor_apic
|
|
{
|
|
- APIC_HEADER_DEF
|
|
+ ACPI_SUB_HEADER_DEF
|
|
uint8_t processor_id; /* ACPI processor id */
|
|
uint8_t local_apic_id; /* Processor's local APIC id */
|
|
#if 0
|
|
@@ -1424,6 +1438,43 @@ struct madt_processor_apic
|
|
#endif
|
|
} __attribute__((__packed__));
|
|
|
|
+/*
|
|
+ * SRAT (NUMA topology description) table
|
|
+ */
|
|
+
|
|
+#define SRAT_PROCESSOR 0
|
|
+#define SRAT_MEMORY 1
|
|
+
|
|
+struct system_resource_affinity_table
|
|
+{
|
|
+ ACPI_TABLE_HEADER_DEF
|
|
+ uint32_t reserved1;
|
|
+ uint32_t reserved2[2];
|
|
+};
|
|
+
|
|
+struct srat_processor_affinity
|
|
+{
|
|
+ ACPI_SUB_HEADER_DEF
|
|
+ uint8_t proximity_lo;
|
|
+ uint8_t local_apic_id;
|
|
+ uint32_t flags;
|
|
+ uint8_t local_sapic_eid;
|
|
+ uint8_t proximity_hi[3];
|
|
+ uint32_t reserved;
|
|
+};
|
|
+
|
|
+struct srat_memory_affinity
|
|
+{
|
|
+ ACPI_SUB_HEADER_DEF
|
|
+ uint8_t proximity[4];
|
|
+ uint16_t reserved1;
|
|
+ uint32_t base_addr_low,base_addr_high;
|
|
+ uint32_t length_low,length_high;
|
|
+ uint32_t reserved2;
|
|
+ uint32_t flags;
|
|
+ uint32_t reserved3[2];
|
|
+};
|
|
+
|
|
#ifdef BX_QEMU
|
|
/*
|
|
* * ACPI 2.0 Generic Address Space definition.
|
|
@@ -1452,7 +1503,7 @@ struct acpi_20_hpet {
|
|
|
|
struct madt_io_apic
|
|
{
|
|
- APIC_HEADER_DEF
|
|
+ ACPI_SUB_HEADER_DEF
|
|
uint8_t io_apic_id; /* I/O APIC ID */
|
|
uint8_t reserved; /* Reserved - must be zero */
|
|
uint32_t address; /* APIC physical address */
|
|
@@ -1463,7 +1514,7 @@ struct madt_io_apic
|
|
#ifdef BX_QEMU
|
|
struct madt_int_override
|
|
{
|
|
- APIC_HEADER_DEF
|
|
+ ACPI_SUB_HEADER_DEF
|
|
uint8_t bus; /* Identifies ISA Bus */
|
|
uint8_t source; /* Bus-relative interrupt source */
|
|
uint32_t gsi; /* GSI that source will signal */
|
|
@@ -1567,6 +1618,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
|
|
return ssdt_ptr - ssdt;
|
|
}
|
|
|
|
+static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
|
|
+ uint64_t base, uint64_t len, int node, int enabled)
|
|
+{
|
|
+ numamem->type = SRAT_MEMORY;
|
|
+ numamem->length = sizeof(*numamem);
|
|
+ memset (numamem->proximity, 0 ,4);
|
|
+ numamem->proximity[0] = node;
|
|
+ numamem->flags = cpu_to_le32(!!enabled);
|
|
+ numamem->base_addr_low = base & 0xFFFFFFFF;
|
|
+ numamem->base_addr_high = base >> 32;
|
|
+ numamem->length_low = len & 0xFFFFFFFF;
|
|
+ numamem->length_high = len >> 32;
|
|
+ return;
|
|
+}
|
|
+
|
|
/* base_addr must be a multiple of 4KB */
|
|
void acpi_bios_init(void)
|
|
{
|
|
@@ -1577,12 +1643,15 @@ void acpi_bios_init(void)
|
|
struct multiple_apic_table *madt;
|
|
uint8_t *dsdt, *ssdt;
|
|
#ifdef BX_QEMU
|
|
+ struct system_resource_affinity_table *srat;
|
|
struct acpi_20_hpet *hpet;
|
|
uint32_t hpet_addr;
|
|
#endif
|
|
uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
|
|
uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
|
|
+ uint32_t srat_addr,srat_size;
|
|
uint16_t i, external_tables;
|
|
+ int nb_numa_nodes;
|
|
|
|
/* reserve memory space for tables */
|
|
#ifdef BX_USE_EBDA_TABLES
|
|
@@ -1624,6 +1693,25 @@ void acpi_bios_init(void)
|
|
ssdt_addr = addr;
|
|
ssdt = (void *)(addr);
|
|
addr += acpi_build_processor_ssdt(ssdt);
|
|
+#ifdef BX_QEMU
|
|
+ qemu_cfg_select(QEMU_CFG_NUMA);
|
|
+ nb_numa_nodes = qemu_cfg_get64();
|
|
+#else
|
|
+ nb_numa_nodes = 0;
|
|
+#endif
|
|
+ if (nb_numa_nodes > 0) {
|
|
+ addr = (addr + 7) & ~7;
|
|
+ srat_addr = addr;
|
|
+ srat_size = sizeof(*srat) +
|
|
+ sizeof(struct srat_processor_affinity) * smp_cpus +
|
|
+ sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
|
|
+ srat = (void *)(addr);
|
|
+ addr += srat_size;
|
|
+ } else {
|
|
+ srat_addr = addr;
|
|
+ srat = (void*)(addr);
|
|
+ srat_size = 0;
|
|
+ }
|
|
|
|
addr = (addr + 7) & ~7;
|
|
madt_addr = addr;
|
|
@@ -1733,6 +1821,69 @@ void acpi_bios_init(void)
|
|
|
|
memset(rsdt, 0, rsdt_size);
|
|
#ifdef BX_QEMU
|
|
+ /* SRAT */
|
|
+ if (nb_numa_nodes > 0) {
|
|
+ struct srat_processor_affinity *core;
|
|
+ struct srat_memory_affinity *numamem;
|
|
+ int slots;
|
|
+ uint64_t mem_len, mem_base, next_base = 0, curnode;
|
|
+
|
|
+ qemu_cfg_select(QEMU_CFG_NUMA);
|
|
+ qemu_cfg_get64();
|
|
+ memset (srat, 0 , srat_size);
|
|
+ srat->reserved1=1;
|
|
+
|
|
+ core = (void*)(srat + 1);
|
|
+ for (i = 0; i < smp_cpus; ++i) {
|
|
+ core->type = SRAT_PROCESSOR;
|
|
+ core->length = sizeof(*core);
|
|
+ core->local_apic_id = i;
|
|
+ curnode = qemu_cfg_get64();
|
|
+ core->proximity_lo = curnode;
|
|
+ memset (core->proximity_hi, 0, 3);
|
|
+ core->local_sapic_eid = 0;
|
|
+ if (i < smp_cpus)
|
|
+ core->flags = cpu_to_le32(1);
|
|
+ else
|
|
+ core->flags = 0;
|
|
+ core++;
|
|
+ }
|
|
+
|
|
+ /* the memory map is a bit tricky, it contains at least one hole
|
|
+ * from 640k-1M and possibly another one from 3.5G-4G.
|
|
+ */
|
|
+ numamem = (void*)core; slots = 0;
|
|
+ acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
|
|
+ next_base = 1024 * 1024; numamem++;slots++;
|
|
+ for (i = 1; i < nb_numa_nodes + 1; ++i) {
|
|
+ mem_base = next_base;
|
|
+ mem_len = qemu_cfg_get64();
|
|
+ if (i == 1) mem_len -= 1024 * 1024;
|
|
+ next_base = mem_base + mem_len;
|
|
+
|
|
+ /* Cut out the PCI hole */
|
|
+ if (mem_base <= ram_size && next_base > ram_size) {
|
|
+ mem_len -= next_base - ram_size;
|
|
+ if (mem_len > 0) {
|
|
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
|
|
+ numamem++; slots++;
|
|
+ }
|
|
+ mem_base = 1ULL << 32;
|
|
+ mem_len = next_base - ram_size;
|
|
+ next_base += (1ULL << 32) - ram_size;
|
|
+ }
|
|
+ acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
|
|
+ numamem++; slots++;
|
|
+ }
|
|
+ for (; slots < nb_numa_nodes + 2; slots++) {
|
|
+ acpi_build_srat_memory(numamem, 0, 0, 0, 0);
|
|
+ numamem++;
|
|
+ }
|
|
+
|
|
+ acpi_build_table_header((struct acpi_table_header *)srat,
|
|
+ "SRAT", srat_size, 1);
|
|
+ }
|
|
+
|
|
/* HPET */
|
|
memset(hpet, 0, sizeof(*hpet));
|
|
/* Note timer_block_id value must be kept in sync with value advertised by
|
|
@@ -1761,9 +1912,11 @@ void acpi_bios_init(void)
|
|
rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
|
|
#ifdef BX_QEMU
|
|
rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
|
|
+ if (nb_numa_nodes > 0)
|
|
+ rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
|
|
#endif
|
|
- acpi_build_table_header((struct acpi_table_header *)rsdt,
|
|
- "RSDT", rsdt_size, 1);
|
|
+ acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
|
|
+ rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
|
|
|
|
acpi_tables_size = addr - base_addr;
|
|
|
|
--
|
|
1.6.1.3
|
|
|
|
|