From a18495159a35e9c5973d9aa0f612a97318bf684d Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 20 Mar 2018 08:08:15 +0800 Subject: [PATCH 1/7] i386: add KnightsMill cpu model A new cpu model called "KnightsMill" is added to model Knights Mill processors. Compared to "Skylake-Server" cpu model, the following features are added: avx512_4vnniw avx512_4fmaps avx512pf avx512er avx512_vpopcntdq and the following features are removed: pcid invpcid clflushopt avx512dq avx512bw clwb smap rtm mpx xsavec xgetbv1 hle Signed-off-by: Boqun Feng Message-Id: <20180320000821.8337-1-boqun.feng@intel.com> Signed-off-by: Eduardo Habkost --- target/i386/cpu.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b0a1c629a3..52fd35b6a1 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1839,6 +1839,48 @@ static X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000008, .model_id = "Intel Xeon Processor (Skylake, IBRS)", }, + { + .name = "KnightsMill", + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 133, + .stepping = 0, + .features[FEAT_1_EDX] = + CPUID_VME | CPUID_SS | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | + CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | + CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | + CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | + CPUID_PSE | CPUID_DE | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | + CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | + CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | + CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | + CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | + CPUID_EXT_F16C | CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | + CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512PF | + CPUID_7_0_EBX_AVX512ER, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VPOPCNTDQ, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_AVX512_4VNNIW | CPUID_7_0_EDX_AVX512_4FMAPS, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Phi Processor (Knights Mill)", + }, { .name = "Opteron_G1", .level = 5, From 0da0fb062841d0dcd8ba47e4a989d2e952cdf0ff Mon Sep 17 00:00:00 2001 From: Jingqi Liu Date: Fri, 4 May 2018 11:57:33 +0800 Subject: [PATCH 2/7] x86/cpu: Enable CLDEMOTE(Demote Cache Line) cpu feature The CLDEMOTE instruction hints to hardware that the cache line that contains the linear address should be moved("demoted") from the cache(s) closest to the processor core to a level more distant from the processor core. This may accelerate subsequent accesses to the line by other cores in the same coherence domain, especially if the line was written by the core that demotes the line. Intel Snow Ridge has added new cpu feature, CLDEMOTE. The new cpu feature needs to be exposed to guest VM. The bit definition: CPUID.(EAX=7,ECX=0):ECX[bit 25] CLDEMOTE The release document ref below link: https://software.intel.com/sites/default/files/managed/c5/15/\ architecture-instruction-set-extensions-programming-reference.pdf Signed-off-by: Jingqi Liu Message-Id: <1525406253-54846-1-git-send-email-jingqi.liu@intel.com> Reviewed-by: Eduardo Habkost Signed-off-by: Eduardo Habkost --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 52fd35b6a1..4b39ab5dd4 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -494,7 +494,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx512bitalg", NULL, "avx512-vpopcntdq", NULL, "la57", NULL, NULL, NULL, NULL, NULL, "rdpid", NULL, - NULL, NULL, NULL, NULL, + NULL, "cldemote", NULL, NULL, NULL, NULL, NULL, NULL, }, .cpuid_eax = 7, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b58b779bff..8fbe1537c1 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -680,6 +680,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_AVX512_VPOPCNTDQ (1U << 14) /* POPCNT for vectors of DW/QW */ #define CPUID_7_0_ECX_LA57 (1U << 16) #define CPUID_7_0_ECX_RDPID (1U << 22) +#define CPUID_7_0_ECX_CLDEMOTE (1U << 25) /* CLDEMOTE Instruction */ #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ From 7e3482f824809e1f6ffeb5bb8103ba27a7d1a52a Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 10 May 2018 15:41:41 -0500 Subject: [PATCH 3/7] i386: Helpers to encode cache information consistently Instead of having a collection of macros that need to be used in complex expressions to build CPUID data, define a CPUCacheInfo struct that can hold information about a given cache. Helper functions will take a CPUCacheInfo struct as input to encode CPUID leaves for a cache. This will help us ensure consistency between cache information CPUID leaves, and make the existing inconsistencies in CPUID info more visible. Signed-off-by: Eduardo Habkost Signed-off-by: Babu Moger Tested-by: Geoffrey McRae Message-Id: <20180510204148.11687-2-babu.moger@amd.com> Signed-off-by: Eduardo Habkost --- target/i386/cpu.c | 495 ++++++++++++++++++++++++++++++++++------------ target/i386/cpu.h | 53 +++++ 2 files changed, 424 insertions(+), 124 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 4b39ab5dd4..28bb93990e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -56,33 +56,240 @@ #include "disas/capstone.h" +/* Helpers for building CPUID[2] descriptors: */ -/* Cache topology CPUID constants: */ +struct CPUID2CacheDescriptorInfo { + enum CacheType type; + int level; + int size; + int line_size; + int associativity; +}; -/* CPUID Leaf 2 Descriptors */ +#define KiB 1024 +#define MiB (1024 * 1024) -#define CPUID_2_L1D_32KB_8WAY_64B 0x2c -#define CPUID_2_L1I_32KB_8WAY_64B 0x30 -#define CPUID_2_L2_2MB_8WAY_64B 0x7d -#define CPUID_2_L3_16MB_16WAY_64B 0x4d +/* + * Known CPUID 2 cache descriptors. + * From Intel SDM Volume 2A, CPUID instruction + */ +struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { + [0x06] = { .level = 1, .type = ICACHE, .size = 8 * KiB, + .associativity = 4, .line_size = 32, }, + [0x08] = { .level = 1, .type = ICACHE, .size = 16 * KiB, + .associativity = 4, .line_size = 32, }, + [0x09] = { .level = 1, .type = ICACHE, .size = 32 * KiB, + .associativity = 4, .line_size = 64, }, + [0x0A] = { .level = 1, .type = DCACHE, .size = 8 * KiB, + .associativity = 2, .line_size = 32, }, + [0x0C] = { .level = 1, .type = DCACHE, .size = 16 * KiB, + .associativity = 4, .line_size = 32, }, + [0x0D] = { .level = 1, .type = DCACHE, .size = 16 * KiB, + .associativity = 4, .line_size = 64, }, + [0x0E] = { .level = 1, .type = DCACHE, .size = 24 * KiB, + .associativity = 6, .line_size = 64, }, + [0x1D] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB, + .associativity = 2, .line_size = 64, }, + [0x21] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, + .associativity = 8, .line_size = 64, }, + /* lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x22, 0x23 are not included + */ + [0x24] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 16, .line_size = 64, }, + /* lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x25, 0x20 are not included + */ + [0x2C] = { .level = 1, .type = DCACHE, .size = 32 * KiB, + .associativity = 8, .line_size = 64, }, + [0x30] = { .level = 1, .type = ICACHE, .size = 32 * KiB, + .associativity = 8, .line_size = 64, }, + [0x41] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB, + .associativity = 4, .line_size = 32, }, + [0x42] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, + .associativity = 4, .line_size = 32, }, + [0x43] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, + .associativity = 4, .line_size = 32, }, + [0x44] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 4, .line_size = 32, }, + [0x45] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, + .associativity = 4, .line_size = 32, }, + [0x46] = { .level = 3, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 4, .line_size = 64, }, + [0x47] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, + .associativity = 8, .line_size = 64, }, + [0x48] = { .level = 2, .type = UNIFIED_CACHE, .size = 3 * MiB, + .associativity = 12, .line_size = 64, }, + /* Descriptor 0x49 depends on CPU family/model, so it is not included */ + [0x4A] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB, + .associativity = 12, .line_size = 64, }, + [0x4B] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, + .associativity = 16, .line_size = 64, }, + [0x4C] = { .level = 3, .type = UNIFIED_CACHE, .size = 12 * MiB, + .associativity = 12, .line_size = 64, }, + [0x4D] = { .level = 3, .type = UNIFIED_CACHE, .size = 16 * MiB, + .associativity = 16, .line_size = 64, }, + [0x4E] = { .level = 2, .type = UNIFIED_CACHE, .size = 6 * MiB, + .associativity = 24, .line_size = 64, }, + [0x60] = { .level = 1, .type = DCACHE, .size = 16 * KiB, + .associativity = 8, .line_size = 64, }, + [0x66] = { .level = 1, .type = DCACHE, .size = 8 * KiB, + .associativity = 4, .line_size = 64, }, + [0x67] = { .level = 1, .type = DCACHE, .size = 16 * KiB, + .associativity = 4, .line_size = 64, }, + [0x68] = { .level = 1, .type = DCACHE, .size = 32 * KiB, + .associativity = 4, .line_size = 64, }, + [0x78] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 4, .line_size = 64, }, + /* lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. + */ + [0x7D] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, + .associativity = 8, .line_size = 64, }, + [0x7F] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, + .associativity = 2, .line_size = 64, }, + [0x80] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, + .associativity = 8, .line_size = 64, }, + [0x82] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, + .associativity = 8, .line_size = 32, }, + [0x83] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, + .associativity = 8, .line_size = 32, }, + [0x84] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 8, .line_size = 32, }, + [0x85] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, + .associativity = 8, .line_size = 32, }, + [0x86] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, + .associativity = 4, .line_size = 64, }, + [0x87] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 8, .line_size = 64, }, + [0xD0] = { .level = 3, .type = UNIFIED_CACHE, .size = 512 * KiB, + .associativity = 4, .line_size = 64, }, + [0xD1] = { .level = 3, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 4, .line_size = 64, }, + [0xD2] = { .level = 3, .type = UNIFIED_CACHE, .size = 2 * MiB, + .associativity = 4, .line_size = 64, }, + [0xD6] = { .level = 3, .type = UNIFIED_CACHE, .size = 1 * MiB, + .associativity = 8, .line_size = 64, }, + [0xD7] = { .level = 3, .type = UNIFIED_CACHE, .size = 2 * MiB, + .associativity = 8, .line_size = 64, }, + [0xD8] = { .level = 3, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 8, .line_size = 64, }, + [0xDC] = { .level = 3, .type = UNIFIED_CACHE, .size = 1.5 * MiB, + .associativity = 12, .line_size = 64, }, + [0xDD] = { .level = 3, .type = UNIFIED_CACHE, .size = 3 * MiB, + .associativity = 12, .line_size = 64, }, + [0xDE] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB, + .associativity = 12, .line_size = 64, }, + [0xE2] = { .level = 3, .type = UNIFIED_CACHE, .size = 2 * MiB, + .associativity = 16, .line_size = 64, }, + [0xE3] = { .level = 3, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 16, .line_size = 64, }, + [0xE4] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, + .associativity = 16, .line_size = 64, }, + [0xEA] = { .level = 3, .type = UNIFIED_CACHE, .size = 12 * MiB, + .associativity = 24, .line_size = 64, }, + [0xEB] = { .level = 3, .type = UNIFIED_CACHE, .size = 18 * MiB, + .associativity = 24, .line_size = 64, }, + [0xEC] = { .level = 3, .type = UNIFIED_CACHE, .size = 24 * MiB, + .associativity = 24, .line_size = 64, }, +}; +/* + * "CPUID leaf 2 does not report cache descriptor information, + * use CPUID leaf 4 to query cache parameters" + */ +#define CACHE_DESCRIPTOR_UNAVAILABLE 0xFF + +/* + * Return a CPUID 2 cache descriptor for a given cache. + * If no known descriptor is found, return CACHE_DESCRIPTOR_UNAVAILABLE + */ +static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) +{ + int i; + + assert(cache->size > 0); + assert(cache->level > 0); + assert(cache->line_size > 0); + assert(cache->associativity > 0); + for (i = 0; i < ARRAY_SIZE(cpuid2_cache_descriptors); i++) { + struct CPUID2CacheDescriptorInfo *d = &cpuid2_cache_descriptors[i]; + if (d->level == cache->level && d->type == cache->type && + d->size == cache->size && d->line_size == cache->line_size && + d->associativity == cache->associativity) { + return i; + } + } + + return CACHE_DESCRIPTOR_UNAVAILABLE; +} /* CPUID Leaf 4 constants: */ /* EAX: */ -#define CPUID_4_TYPE_DCACHE 1 -#define CPUID_4_TYPE_ICACHE 2 -#define CPUID_4_TYPE_UNIFIED 3 +#define CACHE_TYPE_D 1 +#define CACHE_TYPE_I 2 +#define CACHE_TYPE_UNIFIED 3 -#define CPUID_4_LEVEL(l) ((l) << 5) +#define CACHE_LEVEL(l) (l << 5) -#define CPUID_4_SELF_INIT_LEVEL (1 << 8) -#define CPUID_4_FULLY_ASSOC (1 << 9) +#define CACHE_SELF_INIT_LEVEL (1 << 8) /* EDX: */ -#define CPUID_4_NO_INVD_SHARING (1 << 0) -#define CPUID_4_INCLUSIVE (1 << 1) -#define CPUID_4_COMPLEX_IDX (1 << 2) +#define CACHE_NO_INVD_SHARING (1 << 0) +#define CACHE_INCLUSIVE (1 << 1) +#define CACHE_COMPLEX_IDX (1 << 2) + +/* Encode CacheType for CPUID[4].EAX */ +#define CACHE_TYPE(t) (((t) == DCACHE) ? CACHE_TYPE_D : \ + ((t) == ICACHE) ? CACHE_TYPE_I : \ + ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ + 0 /* Invalid value */) + + +/* Encode cache info for CPUID[4] */ +static void encode_cache_cpuid4(CPUCacheInfo *cache, + int num_apic_ids, int num_cores, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + assert(cache->size == cache->line_size * cache->associativity * + cache->partitions * cache->sets); + + assert(num_apic_ids > 0); + *eax = CACHE_TYPE(cache->type) | + CACHE_LEVEL(cache->level) | + (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | + ((num_cores - 1) << 26) | + ((num_apic_ids - 1) << 14); + + assert(cache->line_size > 0); + assert(cache->partitions > 0); + assert(cache->associativity > 0); + /* We don't implement fully-associative caches */ + assert(cache->associativity < cache->sets); + *ebx = (cache->line_size - 1) | + ((cache->partitions - 1) << 12) | + ((cache->associativity - 1) << 22); + + assert(cache->sets > 0); + *ecx = cache->sets - 1; + + *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) | + (cache->inclusive ? CACHE_INCLUSIVE : 0) | + (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); +} + +/* Encode cache info for CPUID[0x80000005].ECX or CPUID[0x80000005].EDX */ +static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) +{ + assert(cache->size % 1024 == 0); + assert(cache->lines_per_tag > 0); + assert(cache->associativity > 0); + assert(cache->line_size > 0); + return ((cache->size / 1024) << 24) | (cache->associativity << 16) | + (cache->lines_per_tag << 8) | (cache->line_size); +} #define ASSOC_FULL 0xFF @@ -100,57 +307,140 @@ a == ASSOC_FULL ? 0xF : \ 0 /* invalid value */) +/* + * Encode cache info for CPUID[0x80000006].ECX and CPUID[0x80000006].EDX + * @l3 can be NULL. + */ +static void encode_cache_cpuid80000006(CPUCacheInfo *l2, + CPUCacheInfo *l3, + uint32_t *ecx, uint32_t *edx) +{ + assert(l2->size % 1024 == 0); + assert(l2->associativity > 0); + assert(l2->lines_per_tag > 0); + assert(l2->line_size > 0); + *ecx = ((l2->size / 1024) << 16) | + (AMD_ENC_ASSOC(l2->associativity) << 12) | + (l2->lines_per_tag << 8) | (l2->line_size); + + if (l3) { + assert(l3->size % (512 * 1024) == 0); + assert(l3->associativity > 0); + assert(l3->lines_per_tag > 0); + assert(l3->line_size > 0); + *edx = ((l3->size / (512 * 1024)) << 18) | + (AMD_ENC_ASSOC(l3->associativity) << 12) | + (l3->lines_per_tag << 8) | (l3->line_size); + } else { + *edx = 0; + } +} /* Definitions of the hardcoded cache entries we expose: */ /* L1 data cache: */ -#define L1D_LINE_SIZE 64 -#define L1D_ASSOCIATIVITY 8 -#define L1D_SETS 64 -#define L1D_PARTITIONS 1 -/* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */ -#define L1D_DESCRIPTOR CPUID_2_L1D_32KB_8WAY_64B +static CPUCacheInfo l1d_cache = { + .type = DCACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, +}; + /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -#define L1D_LINES_PER_TAG 1 -#define L1D_SIZE_KB_AMD 64 -#define L1D_ASSOCIATIVITY_AMD 2 +static CPUCacheInfo l1d_cache_amd = { + .type = DCACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, +}; /* L1 instruction cache: */ -#define L1I_LINE_SIZE 64 -#define L1I_ASSOCIATIVITY 8 -#define L1I_SETS 64 -#define L1I_PARTITIONS 1 -/* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 32KiB */ -#define L1I_DESCRIPTOR CPUID_2_L1I_32KB_8WAY_64B +static CPUCacheInfo l1i_cache = { + .type = ICACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, +}; + /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -#define L1I_LINES_PER_TAG 1 -#define L1I_SIZE_KB_AMD 64 -#define L1I_ASSOCIATIVITY_AMD 2 +static CPUCacheInfo l1i_cache_amd = { + .type = ICACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, +}; /* Level 2 unified cache: */ -#define L2_LINE_SIZE 64 -#define L2_ASSOCIATIVITY 16 -#define L2_SETS 4096 -#define L2_PARTITIONS 1 -/* Size = LINE_SIZE*ASSOCIATIVITY*SETS*PARTITIONS = 4MiB */ +static CPUCacheInfo l2_cache = { + .type = UNIFIED_CACHE, + .level = 2, + .size = 4 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, +}; + /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ -#define L2_DESCRIPTOR CPUID_2_L2_2MB_8WAY_64B +static CPUCacheInfo l2_cache_cpuid2 = { + .type = UNIFIED_CACHE, + .level = 2, + .size = 2 * MiB, + .line_size = 64, + .associativity = 8, +}; + + /*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */ -#define L2_LINES_PER_TAG 1 -#define L2_SIZE_KB_AMD 512 +static CPUCacheInfo l2_cache_amd = { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .lines_per_tag = 1, + .associativity = 16, + .sets = 512, + .partitions = 1, +}; /* Level 3 unified cache: */ -#define L3_SIZE_KB 0 /* disabled */ -#define L3_ASSOCIATIVITY 0 /* disabled */ -#define L3_LINES_PER_TAG 0 /* disabled */ -#define L3_LINE_SIZE 0 /* disabled */ -#define L3_N_LINE_SIZE 64 -#define L3_N_ASSOCIATIVITY 16 -#define L3_N_SETS 16384 -#define L3_N_PARTITIONS 1 -#define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B -#define L3_N_LINES_PER_TAG 1 -#define L3_N_SIZE_KB_AMD 16384 +static CPUCacheInfo l3_cache = { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, +}; /* TLB definitions: */ @@ -3344,85 +3634,53 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (!cpu->enable_l3_cache) { *ecx = 0; } else { - *ecx = L3_N_DESCRIPTOR; + *ecx = cpuid2_cache_descriptor(&l3_cache); } - *edx = (L1D_DESCRIPTOR << 16) | \ - (L1I_DESCRIPTOR << 8) | \ - (L2_DESCRIPTOR); + *edx = (cpuid2_cache_descriptor(&l1d_cache) << 16) | + (cpuid2_cache_descriptor(&l1i_cache) << 8) | + (cpuid2_cache_descriptor(&l2_cache_cpuid2)); break; case 4: /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { host_cpuid(index, count, eax, ebx, ecx, edx); + /* QEMU gives out its own APIC IDs, never pass down bits 31..26. */ *eax &= ~0xFC000000; + if ((*eax & 31) && cs->nr_cores > 1) { + *eax |= (cs->nr_cores - 1) << 26; + } } else { *eax = 0; switch (count) { case 0: /* L1 dcache info */ - *eax |= CPUID_4_TYPE_DCACHE | \ - CPUID_4_LEVEL(1) | \ - CPUID_4_SELF_INIT_LEVEL; - *ebx = (L1D_LINE_SIZE - 1) | \ - ((L1D_PARTITIONS - 1) << 12) | \ - ((L1D_ASSOCIATIVITY - 1) << 22); - *ecx = L1D_SETS - 1; - *edx = CPUID_4_NO_INVD_SHARING; + encode_cache_cpuid4(&l1d_cache, + 1, cs->nr_cores, + eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - *eax |= CPUID_4_TYPE_ICACHE | \ - CPUID_4_LEVEL(1) | \ - CPUID_4_SELF_INIT_LEVEL; - *ebx = (L1I_LINE_SIZE - 1) | \ - ((L1I_PARTITIONS - 1) << 12) | \ - ((L1I_ASSOCIATIVITY - 1) << 22); - *ecx = L1I_SETS - 1; - *edx = CPUID_4_NO_INVD_SHARING; + encode_cache_cpuid4(&l1i_cache, + 1, cs->nr_cores, + eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - *eax |= CPUID_4_TYPE_UNIFIED | \ - CPUID_4_LEVEL(2) | \ - CPUID_4_SELF_INIT_LEVEL; - if (cs->nr_threads > 1) { - *eax |= (cs->nr_threads - 1) << 14; - } - *ebx = (L2_LINE_SIZE - 1) | \ - ((L2_PARTITIONS - 1) << 12) | \ - ((L2_ASSOCIATIVITY - 1) << 22); - *ecx = L2_SETS - 1; - *edx = CPUID_4_NO_INVD_SHARING; + encode_cache_cpuid4(&l2_cache, + cs->nr_threads, cs->nr_cores, + eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - if (!cpu->enable_l3_cache) { - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; + pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads); + if (cpu->enable_l3_cache) { + encode_cache_cpuid4(&l3_cache, + (1 << pkg_offset), cs->nr_cores, + eax, ebx, ecx, edx); break; } - *eax |= CPUID_4_TYPE_UNIFIED | \ - CPUID_4_LEVEL(3) | \ - CPUID_4_SELF_INIT_LEVEL; - pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads); - *eax |= ((1 << pkg_offset) - 1) << 14; - *ebx = (L3_N_LINE_SIZE - 1) | \ - ((L3_N_PARTITIONS - 1) << 12) | \ - ((L3_N_ASSOCIATIVITY - 1) << 22); - *ecx = L3_N_SETS - 1; - *edx = CPUID_4_INCLUSIVE | CPUID_4_COMPLEX_IDX; - break; + /* fall through */ default: /* end of info */ - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; + *eax = *ebx = *ecx = *edx = 0; break; } } - - /* QEMU gives out its own APIC IDs, never pass down bits 31..26. */ - if ((*eax & 31) && cs->nr_cores > 1) { - *eax |= (cs->nr_cores - 1) << 26; - } break; case 5: /* mwait info: needed for Core compatibility */ @@ -3626,10 +3884,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \ (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); - *ecx = (L1D_SIZE_KB_AMD << 24) | (L1D_ASSOCIATIVITY_AMD << 16) | \ - (L1D_LINES_PER_TAG << 8) | (L1D_LINE_SIZE); - *edx = (L1I_SIZE_KB_AMD << 24) | (L1I_ASSOCIATIVITY_AMD << 16) | \ - (L1I_LINES_PER_TAG << 8) | (L1I_LINE_SIZE); + *ecx = encode_cache_cpuid80000005(&l1d_cache_amd); + *edx = encode_cache_cpuid80000005(&l1i_cache_amd); break; case 0x80000006: /* cache info (L2 cache) */ @@ -3645,18 +3901,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, (L2_DTLB_4K_ENTRIES << 16) | \ (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \ (L2_ITLB_4K_ENTRIES); - *ecx = (L2_SIZE_KB_AMD << 16) | \ - (AMD_ENC_ASSOC(L2_ASSOCIATIVITY) << 12) | \ - (L2_LINES_PER_TAG << 8) | (L2_LINE_SIZE); - if (!cpu->enable_l3_cache) { - *edx = ((L3_SIZE_KB / 512) << 18) | \ - (AMD_ENC_ASSOC(L3_ASSOCIATIVITY) << 12) | \ - (L3_LINES_PER_TAG << 8) | (L3_LINE_SIZE); - } else { - *edx = ((L3_N_SIZE_KB_AMD / 512) << 18) | \ - (AMD_ENC_ASSOC(L3_N_ASSOCIATIVITY) << 12) | \ - (L3_N_LINES_PER_TAG << 8) | (L3_N_LINE_SIZE); - } + encode_cache_cpuid80000006(&l2_cache_amd, + cpu->enable_l3_cache ? &l3_cache : NULL, + ecx, edx); break; case 0x80000007: *eax = 0; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8fbe1537c1..512c69dddd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1045,6 +1045,59 @@ typedef enum TPRAccess { TPR_ACCESS_WRITE, } TPRAccess; +/* Cache information data structures: */ + +enum CacheType { + DCACHE, + ICACHE, + UNIFIED_CACHE +}; + +typedef struct CPUCacheInfo { + enum CacheType type; + uint8_t level; + /* Size in bytes */ + uint32_t size; + /* Line size, in bytes */ + uint16_t line_size; + /* + * Associativity. + * Note: representation of fully-associative caches is not implemented + */ + uint8_t associativity; + /* Physical line partitions. CPUID[0x8000001D].EBX, CPUID[4].EBX */ + uint8_t partitions; + /* Number of sets. CPUID[0x8000001D].ECX, CPUID[4].ECX */ + uint32_t sets; + /* + * Lines per tag. + * AMD-specific: CPUID[0x80000005], CPUID[0x80000006]. + * (Is this synonym to @partitions?) + */ + uint8_t lines_per_tag; + + /* Self-initializing cache */ + bool self_init; + /* + * WBINVD/INVD is not guaranteed to act upon lower level caches of + * non-originating threads sharing this cache. + * CPUID[4].EDX[bit 0], CPUID[0x8000001D].EDX[bit 0] + */ + bool no_invd_sharing; + /* + * Cache is inclusive of lower cache levels. + * CPUID[4].EDX[bit 1], CPUID[0x8000001D].EDX[bit 1]. + */ + bool inclusive; + /* + * A complex function is used to index the cache, potentially using all + * address bits. CPUID[4].EDX[bit 2]. + */ + bool complex_indexing; +} CPUCacheInfo; + + + typedef struct CPUX86State { /* standard registers */ target_ulong regs[CPU_NB_REGS]; From 6aaeb05492ef668f415324f43e7d875c0f1e90b3 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 10 May 2018 15:41:42 -0500 Subject: [PATCH 4/7] i386: Add cache information in X86CPUDefinition Add cache information in X86CPUDefinition and CPUX86State. Signed-off-by: Babu Moger Tested-by: Geoffrey McRae Reviewed-by: Eduardo Habkost Message-Id: <20180510204148.11687-3-babu.moger@amd.com> Signed-off-by: Eduardo Habkost --- target/i386/cpu.c | 1 + target/i386/cpu.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 28bb93990e..55685ed19d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1106,6 +1106,7 @@ struct X86CPUDefinition { int stepping; FeatureWordArray features; const char *model_id; + CPUCaches *cache_info; }; static X86CPUDefinition builtin_x86_defs[] = { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 512c69dddd..ac94013c4a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1097,6 +1097,12 @@ typedef struct CPUCacheInfo { } CPUCacheInfo; +typedef struct CPUCaches { + CPUCacheInfo l1d_cache; + CPUCacheInfo l1i_cache; + CPUCacheInfo l2_cache; + CPUCacheInfo l3_cache; +} CPUCaches; typedef struct CPUX86State { /* standard registers */ @@ -1286,6 +1292,7 @@ typedef struct CPUX86State { /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; + CPUCaches *cache_info; /* MTRRs */ uint64_t mtrr_fixed[11]; From fe52acd2a054b97765963a42037f2f886545e30c Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 10 May 2018 15:41:44 -0500 Subject: [PATCH 5/7] i386: Initialize cache information for EPYC family processors Initialize pre-determined cache information for EPYC processors. Signed-off-by: Babu Moger Tested-by: Geoffrey McRae Message-Id: <20180510204148.11687-5-babu.moger@amd.com> Signed-off-by: Eduardo Habkost --- target/i386/cpu.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 55685ed19d..174a8f434b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1109,6 +1109,56 @@ struct X86CPUDefinition { CPUCaches *cache_info; }; +static CPUCaches epyc_cache_info = { + .l1d_cache = { + .type = DCACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = { + .type = ICACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + }, +}; + static X86CPUDefinition builtin_x86_defs[] = { { .name = "qemu64", @@ -2345,6 +2395,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "AMD EPYC Processor", + .cache_info = &epyc_cache_info, }, { .name = "EPYC-IBPB", @@ -2391,6 +2442,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_6_EAX_ARAT, .xlevel = 0x8000000A, .model_id = "AMD EPYC Processor (with IBPB)", + .cache_info = &epyc_cache_info, }, }; From 968ee4ad25934717b9192dfed6650a6282854e17 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Mon, 14 May 2018 11:41:50 -0500 Subject: [PATCH 6/7] pc: add 2.13 machine types Add pc-q35-2.13 and pc-i440fx-2.13 machine types Signed-off-by: Babu Moger Message-Id: <20180514164156.27034-2-babu.moger@amd.com> Reviewed-by: Eduardo Habkost Signed-off-by: Eduardo Habkost --- hw/i386/pc_piix.c | 15 ++++++++++++--- hw/i386/pc_q35.c | 13 +++++++++++-- include/hw/i386/pc.h | 3 +++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 729a0508aa..e36c7bbb40 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -425,21 +425,30 @@ static void pc_i440fx_machine_options(MachineClass *m) m->default_display = "std"; } -static void pc_i440fx_2_12_machine_options(MachineClass *m) +static void pc_i440fx_2_13_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v2_13, "pc-i440fx-2.13", NULL, + pc_i440fx_2_13_machine_options); + +static void pc_i440fx_2_12_machine_options(MachineClass *m) +{ + pc_i440fx_2_13_machine_options(m); + m->is_default = 0; + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); +} + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, pc_i440fx_2_12_machine_options); static void pc_i440fx_2_11_machine_options(MachineClass *m) { pc_i440fx_2_12_machine_options(m); - m->is_default = 0; - m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_11); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 9ae916327e..2372457c6a 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -308,12 +308,22 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_2_12_machine_options(MachineClass *m) +static void pc_q35_2_13_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v2_13, "pc-q35-2.13", NULL, + pc_q35_2_13_machine_options); + +static void pc_q35_2_12_machine_options(MachineClass *m) +{ + pc_q35_2_13_machine_options(m); + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); +} + DEFINE_Q35_MACHINE(v2_12, "pc-q35-2.12", NULL, pc_q35_2_12_machine_options); @@ -323,7 +333,6 @@ static void pc_q35_2_11_machine_options(MachineClass *m) pc_q35_2_12_machine_options(m); pcmc->default_nic_model = "e1000"; - m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_11); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 2a98e3ad68..d4fe073cb8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -296,6 +296,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_2_12 \ + HW_COMPAT_2_12 \ + #define PC_COMPAT_2_11 \ HW_COMPAT_2_11 \ {\ From ab8f992e3e63e91be257e4e343d386dae7be4bcb Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Mon, 14 May 2018 11:41:51 -0500 Subject: [PATCH 7/7] i386: Add new property to control cache info The property legacy-cache will be used to control the cache information. If user passes "-cpu legacy-cache" then older information will be displayed even if the hardware supports new information. Otherwise use the statically loaded cache definitions if available. Renamed the previous cache structures to legacy_*. If there is any change in the cache information, then it needs to be initialized in builtin_x86_defs. Signed-off-by: Babu Moger Tested-by: Geoffrey McRae Message-Id: <20180514164156.27034-3-babu.moger@amd.com> Reviewed-by: Eduardo Habkost Signed-off-by: Eduardo Habkost --- include/hw/i386/pc.h | 5 +++ target/i386/cpu.c | 97 ++++++++++++++++++++++++++++++++------------ target/i386/cpu.h | 5 +++ 3 files changed, 81 insertions(+), 26 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d4fe073cb8..a0c269fc34 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -298,6 +298,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_12 \ HW_COMPAT_2_12 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "legacy-cache",\ + .value = "on",\ + }, #define PC_COMPAT_2_11 \ HW_COMPAT_2_11 \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 174a8f434b..e5e66a75d4 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -336,10 +336,14 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, } } -/* Definitions of the hardcoded cache entries we expose: */ +/* + * Definitions of the hardcoded cache entries we expose: + * These are legacy cache values. If there is a need to change any + * of these values please use builtin_x86_defs + */ /* L1 data cache: */ -static CPUCacheInfo l1d_cache = { +static CPUCacheInfo legacy_l1d_cache = { .type = DCACHE, .level = 1, .size = 32 * KiB, @@ -352,7 +356,7 @@ static CPUCacheInfo l1d_cache = { }; /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo l1d_cache_amd = { +static CPUCacheInfo legacy_l1d_cache_amd = { .type = DCACHE, .level = 1, .size = 64 * KiB, @@ -366,7 +370,7 @@ static CPUCacheInfo l1d_cache_amd = { }; /* L1 instruction cache: */ -static CPUCacheInfo l1i_cache = { +static CPUCacheInfo legacy_l1i_cache = { .type = ICACHE, .level = 1, .size = 32 * KiB, @@ -379,7 +383,7 @@ static CPUCacheInfo l1i_cache = { }; /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo l1i_cache_amd = { +static CPUCacheInfo legacy_l1i_cache_amd = { .type = ICACHE, .level = 1, .size = 64 * KiB, @@ -393,7 +397,7 @@ static CPUCacheInfo l1i_cache_amd = { }; /* Level 2 unified cache: */ -static CPUCacheInfo l2_cache = { +static CPUCacheInfo legacy_l2_cache = { .type = UNIFIED_CACHE, .level = 2, .size = 4 * MiB, @@ -406,7 +410,7 @@ static CPUCacheInfo l2_cache = { }; /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ -static CPUCacheInfo l2_cache_cpuid2 = { +static CPUCacheInfo legacy_l2_cache_cpuid2 = { .type = UNIFIED_CACHE, .level = 2, .size = 2 * MiB, @@ -416,7 +420,7 @@ static CPUCacheInfo l2_cache_cpuid2 = { /*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo l2_cache_amd = { +static CPUCacheInfo legacy_l2_cache_amd = { .type = UNIFIED_CACHE, .level = 2, .size = 512 * KiB, @@ -428,7 +432,7 @@ static CPUCacheInfo l2_cache_amd = { }; /* Level 3 unified cache: */ -static CPUCacheInfo l3_cache = { +static CPUCacheInfo legacy_l3_cache = { .type = UNIFIED_CACHE, .level = 3, .size = 16 * MiB, @@ -3338,6 +3342,10 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) env->features[w] = def->features[w]; } + /* Store Cache information from the X86CPUDefinition if available */ + env->cache_info = def->cache_info; + cpu->legacy_cache = def->cache_info ? 0 : 1; + /* Special cases not set in the X86CPUDefinition structs: */ /* TODO: in-kernel irqchip for hvf */ if (kvm_enabled()) { @@ -3687,11 +3695,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (!cpu->enable_l3_cache) { *ecx = 0; } else { - *ecx = cpuid2_cache_descriptor(&l3_cache); + if (env->cache_info && !cpu->legacy_cache) { + *ecx = cpuid2_cache_descriptor(&env->cache_info->l3_cache); + } else { + *ecx = cpuid2_cache_descriptor(&legacy_l3_cache); + } + } + if (env->cache_info && !cpu->legacy_cache) { + *edx = (cpuid2_cache_descriptor(&env->cache_info->l1d_cache) << 16) | + (cpuid2_cache_descriptor(&env->cache_info->l1i_cache) << 8) | + (cpuid2_cache_descriptor(&env->cache_info->l2_cache)); + } else { + *edx = (cpuid2_cache_descriptor(&legacy_l1d_cache) << 16) | + (cpuid2_cache_descriptor(&legacy_l1i_cache) << 8) | + (cpuid2_cache_descriptor(&legacy_l2_cache_cpuid2)); } - *edx = (cpuid2_cache_descriptor(&l1d_cache) << 16) | - (cpuid2_cache_descriptor(&l1i_cache) << 8) | - (cpuid2_cache_descriptor(&l2_cache_cpuid2)); break; case 4: /* cache info: needed for Core compatibility */ @@ -3704,27 +3722,35 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } else { *eax = 0; + CPUCacheInfo *l1d, *l1i, *l2, *l3; + if (env->cache_info && !cpu->legacy_cache) { + l1d = &env->cache_info->l1d_cache; + l1i = &env->cache_info->l1i_cache; + l2 = &env->cache_info->l2_cache; + l3 = &env->cache_info->l3_cache; + } else { + l1d = &legacy_l1d_cache; + l1i = &legacy_l1i_cache; + l2 = &legacy_l2_cache; + l3 = &legacy_l3_cache; + } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid4(&l1d_cache, - 1, cs->nr_cores, + encode_cache_cpuid4(l1d, 1, cs->nr_cores, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid4(&l1i_cache, - 1, cs->nr_cores, + encode_cache_cpuid4(l1i, 1, cs->nr_cores, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid4(&l2_cache, - cs->nr_threads, cs->nr_cores, + encode_cache_cpuid4(l2, cs->nr_threads, cs->nr_cores, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads); if (cpu->enable_l3_cache) { - encode_cache_cpuid4(&l3_cache, - (1 << pkg_offset), cs->nr_cores, + encode_cache_cpuid4(l3, (1 << pkg_offset), cs->nr_cores, eax, ebx, ecx, edx); break; } @@ -3937,8 +3963,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \ (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); - *ecx = encode_cache_cpuid80000005(&l1d_cache_amd); - *edx = encode_cache_cpuid80000005(&l1i_cache_amd); + if (env->cache_info && !cpu->legacy_cache) { + *ecx = encode_cache_cpuid80000005(&env->cache_info->l1d_cache); + *edx = encode_cache_cpuid80000005(&env->cache_info->l1i_cache); + } else { + *ecx = encode_cache_cpuid80000005(&legacy_l1d_cache_amd); + *edx = encode_cache_cpuid80000005(&legacy_l1i_cache_amd); + } break; case 0x80000006: /* cache info (L2 cache) */ @@ -3954,9 +3985,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, (L2_DTLB_4K_ENTRIES << 16) | \ (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \ (L2_ITLB_4K_ENTRIES); - encode_cache_cpuid80000006(&l2_cache_amd, - cpu->enable_l3_cache ? &l3_cache : NULL, - ecx, edx); + if (env->cache_info && !cpu->legacy_cache) { + encode_cache_cpuid80000006(&env->cache_info->l2_cache, + cpu->enable_l3_cache ? + &env->cache_info->l3_cache : NULL, + ecx, edx); + } else { + encode_cache_cpuid80000006(&legacy_l2_cache_amd, + cpu->enable_l3_cache ? + &legacy_l3_cache : NULL, + ecx, edx); + } break; case 0x80000007: *eax = 0; @@ -5135,6 +5174,12 @@ static Property x86_cpu_properties[] = { false), DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true), DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true), + /* + * lecacy_cache defaults to CPU model being chosen. This is set in + * x86_cpu_load_def based on cache_info which is initialized in + * builtin_x86_defs + */ + DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, false), /* * From "Requirements for Implementing the Microsoft diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ac94013c4a..8bc54d70bf 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1399,6 +1399,11 @@ struct X86CPU { */ bool enable_l3_cache; + /* Compatibility bits for old machine types. + * If true present the old cache topology information + */ + bool legacy_cache; + /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb;