From 36cc64a9b3f1744e7a030248bb81526e9f37f3d6 Mon Sep 17 00:00:00 2001 From: Pawel Dziepak Date: Wed, 2 Oct 2013 23:48:03 +0200 Subject: [PATCH] x86[_64]: Add CPU cache topology detection for AMD and Intel CPUs --- headers/private/kernel/arch/x86/arch_cpu.h | 8 ++ headers/private/kernel/cpu.h | 1 + src/system/kernel/arch/x86/arch_cpu.cpp | 117 ++++++++++++++++++++- 3 files changed, 121 insertions(+), 5 deletions(-) diff --git a/headers/private/kernel/arch/x86/arch_cpu.h b/headers/private/kernel/arch/x86/arch_cpu.h index 4809528c57..c51ee6f2fd 100644 --- a/headers/private/kernel/arch/x86/arch_cpu.h +++ b/headers/private/kernel/arch/x86/arch_cpu.h @@ -24,6 +24,9 @@ #endif // !_ASSEMBLER +#define CPU_MAX_CACHE_LEVEL 8 + + // MSR registers (possibly Intel specific) #define IA32_MSR_TSC 0x10 #define IA32_MSR_APIC_BASE 0x1b @@ -152,6 +155,10 @@ #define IA32_FEATURE_EXT_RDRND (1 << 30) // RDRAND instruction #define IA32_FEATURE_EXT_HYPERVISOR (1 << 31) // Running on a hypervisor +// x86 features from cpuid eax 0x80000001, ecx register (AMD) +#define IA32_FEATURE_AMD_EXT_CMPLEGACY (1 << 1) // Core MP legacy mode +#define IA32_FEATURE_AMD_EXT_TOPOLOGY (1 << 22) // Topology extensions + // x86 features from cpuid eax 0x80000001, edx register (AMD) // only care about the ones that are unique to this register #define IA32_FEATURE_AMD_EXT_SYSCALL (1 << 11) // SYSCALL/SYSRET @@ -265,6 +272,7 @@ typedef struct x86_cpu_module_info { enum x86_feature_type { FEATURE_COMMON = 0, // cpuid eax=1, ecx register FEATURE_EXT, // cpuid eax=1, edx register + FEATURE_EXT_AMD_ECX, // cpuid eax=0x80000001, ecx register (AMD) FEATURE_EXT_AMD, // cpuid eax=0x80000001, edx register (AMD) FEATURE_6_EAX, // cpuid eax=6, eax registers FEATURE_6_ECX, // cpuid eax=6, ecx registers diff --git a/headers/private/kernel/cpu.h b/headers/private/kernel/cpu.h index ab2a850d03..2432cce524 100644 --- a/headers/private/kernel/cpu.h +++ b/headers/private/kernel/cpu.h @@ -67,6 +67,7 @@ typedef struct cpu_ent { // CPU topology information int topology_id[CPU_TOPOLOGY_LEVELS]; + int cache_id[CPU_MAX_CACHE_LEVEL]; // arch-specific stuff arch_cpu_info arch; diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp b/src/system/kernel/arch/x86/arch_cpu.cpp index da9bc039c5..f1b2d553b5 100644 --- a/src/system/kernel/arch/x86/arch_cpu.cpp +++ b/src/system/kernel/arch/x86/arch_cpu.cpp @@ -129,6 +129,10 @@ static uint32 (*sGetCPUTopologyID)(int currentCPU); static uint32 sHierarchyMask[CPU_TOPOLOGY_LEVELS]; static uint32 sHierarchyShift[CPU_TOPOLOGY_LEVELS]; +/* Cache topology information */ +static uint32 sCacheSharingMask[CPU_MAX_CACHE_LEVEL]; +static uint32 sCacheLevelCount; + static status_t acpi_shutdown(bool rebootSystem) @@ -563,7 +567,8 @@ detectAMDCPUTopology(uint32 maxBasicLeaf, uint32 maxExtendedLeaf) if (maxExtendedLeaf >= 0x80000001) { get_current_cpuid(&cpuid, 0x80000001, 0); - if ((cpuid.regs.ecx & 2) != 0) + if (x86_check_feature(IA32_FEATURE_AMD_EXT_CMPLEGACY, + FEATURE_EXT_AMD_ECX)) maxCoreID = maxLogicalID; } @@ -573,11 +578,46 @@ detectAMDCPUTopology(uint32 maxBasicLeaf, uint32 maxExtendedLeaf) } -static uint32 -getIntelCPUInitialx2APICID(int currentCPU) +static void +detectAMDCacheTopology(uint32 maxExtendedLeaf) { - (void)currentCPU; + if (!x86_check_feature(IA32_FEATURE_AMD_EXT_TOPOLOGY, FEATURE_EXT_AMD_ECX)) + return; + if (maxExtendedLeaf < 0x8000001d) + return; + + uint8 hierarchyLevels[CPU_MAX_CACHE_LEVEL]; + uint8 maxCacheLevel = 0; + + int currentLevel = 0; + int cacheType; + do { + cpuid_info cpuid; + get_current_cpuid(&cpuid, 0x8000001d, currentLevel); + + cacheType = cpuid.regs.eax & 0x1f; + int cacheLevel = (cpuid.regs.eax >> 5) & 0x7; + + int coresCount = nextPowerOf2(((cpuid.regs.eax >> 14) & 0x3f) + 1); + hierarchyLevels[cacheLevel - 1] + = coresCount * (sHierarchyMask[CPU_TOPOLOGY_SMT] + 1); + + if (cacheType != 0) + maxCacheLevel = max_c(maxCacheLevel, cacheLevel); + + currentLevel++; + } while (cacheType != 0); + + for (int i = 0; i < maxCacheLevel; i++) + sCacheSharingMask[i] = ~uint32(hierarchyLevels[i] - 1); + sCacheLevelCount = maxCacheLevel; +} + + +static uint32 +getIntelCPUInitialx2APICID(int /* currentCPU */) +{ cpuid_info cpuid; get_current_cpuid(&cpuid, 11, 0); return cpuid.regs.edx; @@ -655,6 +695,39 @@ detectIntelCPUTopologyLegacy(uint32 maxBasicLeaf) } +static void +detectIntelCacheTopology(uint32 maxBasicLeaf) +{ + if (maxBasicLeaf < 4) + return; + + uint8 hierarchyLevels[CPU_MAX_CACHE_LEVEL]; + uint8 maxCacheLevel = 0; + + int currentLevel = 0; + int cacheType; + do { + cpuid_info cpuid; + get_current_cpuid(&cpuid, 4, currentLevel); + + cacheType = cpuid.regs.eax & 0x1f; + int cacheLevel = (cpuid.regs.eax >> 5) & 0x7; + + hierarchyLevels[cacheLevel - 1] + = nextPowerOf2(((cpuid.regs.eax >> 14) & 0x3f) + 1); + + if (cacheType != 0) + maxCacheLevel = max_c(maxCacheLevel, cacheLevel); + + currentLevel++; + } while (cacheType != 0); + + for (int i = 0; i < maxCacheLevel; i++) + sCacheSharingMask[i] = ~uint32(hierarchyLevels[i] - 1); + sCacheLevelCount = maxCacheLevel; +} + + static uint32 getSimpleCPUTopologyID(int currentCPU) { @@ -675,14 +748,24 @@ detectCPUTopology(int currentCPU, cpu_ent* cpu, uint32 maxBasicLeaf, uint32 maxExtendedLeaf) { if (currentCPU == 0) { + memset(sCacheSharingMask, 0xff, sizeof(sCacheSharingMask)); + status_t result = B_UNSUPPORTED; if (x86_check_feature(IA32_FEATURE_HTT, FEATURE_COMMON)) { - if (cpu->arch.vendor == VENDOR_AMD) + if (cpu->arch.vendor == VENDOR_AMD) { result = detectAMDCPUTopology(maxBasicLeaf, maxExtendedLeaf); + + if (result == B_OK) + detectAMDCacheTopology(maxExtendedLeaf); + } + if (cpu->arch.vendor == VENDOR_INTEL) { result = detectIntelCPUTopologyx2APIC(maxBasicLeaf); if (result != B_OK) result = detectIntelCPUTopologyLegacy(maxBasicLeaf); + + if (result == B_OK) + detectIntelCacheTopology(maxBasicLeaf); } } @@ -707,11 +790,33 @@ detectCPUTopology(int currentCPU, cpu_ent* cpu, uint32 maxBasicLeaf, cpu->topology_id[CPU_TOPOLOGY_PACKAGE] = getTopologyLevelID(topologyID, CPU_TOPOLOGY_PACKAGE); + int i; + for (i = 0; i < sCacheLevelCount; i++) + cpu->cache_id[i] = topologyID & sCacheSharingMask[i]; + for (; i < CPU_MAX_CACHE_LEVEL; i++) + cpu->cache_id[i] = -1; + #if DUMP_CPU_TOPOLOGY dprintf("CPU %d: apic id %d, package %d, core %d, smt %d\n", currentCPU, topologyID, cpu->topology_id[CPU_TOPOLOGY_PACKAGE], cpu->topology_id[CPU_TOPOLOGY_CORE], cpu->topology_id[CPU_TOPOLOGY_SMT]); + + if (sCacheLevelCount > 0) { + char cacheLevels[256]; + int offset = 0; + for (int i = 0; i < sCacheLevelCount; i++) { + offset += snprintf(cacheLevels + offset, + sizeof(cacheLevels) - offset, + " L%d id %d%s", i + 1, cpu->cache_id[i], + i < sCacheLevelCount - 1 ? "," : ""); + + if (offset >= sizeof(cacheLevels)) + break; + } + + dprintf("CPU %d: cache sharing:%s\n", currentCPU, cacheLevels); + } #endif } @@ -820,6 +925,8 @@ detect_cpu(int currentCPU) if (maxExtendedLeaf >= 0x80000001) { get_current_cpuid(&cpuid, 0x80000001, 0); + if (cpu->arch.vendor == VENDOR_AMD) + cpu->arch.feature[FEATURE_EXT_AMD_ECX] = cpuid.regs.ecx; // ecx cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx if (cpu->arch.vendor != VENDOR_AMD) cpu->arch.feature[FEATURE_EXT_AMD] &= IA32_FEATURES_INTEL_EXT;