diff --git a/src/arm/midr.c b/src/arm/midr.c index 1ceae14..2d8d7bc 100644 --- a/src/arm/midr.c +++ b/src/arm/midr.c @@ -81,12 +81,23 @@ } int64_t flops = 0; - ptr = cpu; - for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { - flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); - } - if(cpu->feat->NEON) flops = flops * 4; + + if(cpu->cpu_vendor == SOC_VENDOR_APPLE) { + // Special case for M1/M2 + // First we find the E cores, then the P + // M1 have 2 (E cores) or 4 (P cores) FMA units + // Source: https://dougallj.github.io/applecpu/firestorm-simd.html + flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 2; + ptr = ptr->next_cpu; + flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 4; + } + else { + for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { + flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); + } + if(cpu->feat->NEON) flops = flops * 4; + } return flops; } @@ -273,11 +284,46 @@ fire->next_cpu = NULL; } +void fill_cpu_info_avalanche_blizzard(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) { + // 1. Fill BLIZZARD + struct cpuInfo* bli = cpu; + + bli->midr = MIDR_APPLE_M2_BLIZZARD; + bli->arch = get_uarch_from_midr(bli->midr, bli); + bli->cach = get_cache_info(bli); + bli->feat = get_features_info(); + bli->topo = malloc(sizeof(struct topology)); + bli->topo->cach = bli->cach; + bli->topo->total_cores = pcores; + bli->freq = malloc(sizeof(struct frequency)); + bli->freq->base = UNKNOWN_DATA; + bli->freq->max = 2800; + bli->hv = malloc(sizeof(struct hypervisor)); + bli->hv->present = false; + bli->next_cpu = malloc(sizeof(struct cpuInfo)); + + // 2. Fill AVALANCHE + struct cpuInfo* ava = bli->next_cpu; + ava->midr = MIDR_APPLE_M2_AVALANCHE; + ava->arch = get_uarch_from_midr(ava->midr, ava); + ava->cach = get_cache_info(ava); + ava->feat = get_features_info(); + ava->topo = malloc(sizeof(struct topology)); + ava->topo->cach = ava->cach; + ava->topo->total_cores = ecores; + ava->freq = malloc(sizeof(struct frequency)); + ava->freq->base = UNKNOWN_DATA; + ava->freq->max = 3500; + ava->hv = malloc(sizeof(struct hypervisor)); + ava->hv->present = false; + ava->next_cpu = NULL; +} + struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) { uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); - // Manually fill the cpuInfo assuming that the CPU - // is a ARM_FIRESTORM_ICESTORM (Apple M1) + // Manually fill the cpuInfo assuming that + // the CPU is an Apple M1/M2 if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) { cpu->num_cpus = 2; // Now detect the M1 version @@ -287,18 +333,32 @@ fill_cpu_info_firestorm_icestorm(cpu, 4, 4); } else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS || cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { - // Apple M1 Pro/Max. Detect number of cores + // Apple M1 Pro/Max/Ultra. Detect number of cores uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu"); - if(physicalcpu < 8 || physicalcpu > 10) { - printBug("Found invalid physicalcpu: 0x%.8X", physicalcpu); + if(physicalcpu == 20) { + // M1 Ultra + fill_cpu_info_firestorm_icestorm(cpu, 16, 4); + } + else if(physicalcpu == 8 || physicalcpu == 10) { + // M1 Pro/Max + fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2); + } + else { + printBug("Found invalid physical cpu number: %d", physicalcpu); return NULL; } - fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2); } else { printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); return NULL; } + cpu->soc = get_soc(); + cpu->peak_performance = get_peak_performance(cpu); + } + else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) { + // Just the "normal" M2 exists for now + cpu->num_cpus = 2; + fill_cpu_info_avalanche_blizzard(cpu, 4, 4); cpu->soc = get_soc(); cpu->peak_performance = get_peak_performance(cpu); } diff --git a/src/arm/soc.c b/src/arm/soc.c index 27a6d15..28b6e63 100644 --- a/src/arm/soc.c +++ b/src/arm/soc.c @@ -648,15 +648,49 @@ #if defined(__APPLE__) || defined(__MACH__) struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) { + uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily"); - if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { - fill_soc(soc, "M1", SOC_APPLE_M1, 5); - } - else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS) { - fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5); - } - else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { - fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5); + + if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) { + // Check M1 version + if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { + fill_soc(soc, "M1", SOC_APPLE_M1, 5); + } + else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS) { + fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5); + } + else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { + // Could be M1 Max or M1 Ultra (2x M1 Max) + uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu"); + if(physicalcpu == 20) { + fill_soc(soc, "M1 Ultra", SOC_APPLE_M1_ULTRA, 5); + } + else if(physicalcpu == 10) { + fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5); + } + else { + printBug("Found invalid physical cpu number: %d", physicalcpu); + soc->soc_vendor = SOC_VENDOR_UNKNOWN; + } + } + else { + printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); + soc->soc_vendor = SOC_VENDOR_UNKNOWN; + } + } + else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) { + // Check M2 version + if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { + fill_soc(soc, "M2", SOC_APPLE_M2, 5); + } + else { + printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); + soc->soc_vendor = SOC_VENDOR_UNKNOWN; + } + } + else { + printBug("Found invalid cpu_family: 0x%.8X", cpu_family); + soc->soc_vendor = SOC_VENDOR_UNKNOWN; } return soc; } diff --git a/src/arm/socs.h b/src/arm/socs.h index 7632bc3..e50e932 100644 --- a/src/arm/socs.h +++ b/src/arm/socs.h @@ -256,6 +256,8 @@ SOC_APPLE_M1, SOC_APPLE_M1_PRO, SOC_APPLE_M1_MAX, + SOC_APPLE_M1_ULTRA, + SOC_APPLE_M2, // ALLWINNER SOC_ALLWINNER_A10, SOC_ALLWINNER_A13, @@ -288,7 +290,7 @@ else if(soc >= SOC_EXYNOS_3475 && soc <= SOC_EXYNOS_880) return SOC_VENDOR_EXYNOS; else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK; else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8350) return SOC_VENDOR_SNAPDRAGON; - else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M1_MAX) return SOC_VENDOR_APPLE; + else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M2) return SOC_VENDOR_APPLE; else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER; return SOC_VENDOR_UNKNOWN; } diff --git a/src/arm/sysctl.h b/src/arm/sysctl.h index 716c143..687c377 100644 --- a/src/arm/sysctl.h +++ b/src/arm/sysctl.h @@ -4,9 +4,23 @@ // From Linux kernel: arch/arm64/include/asm/cputype.h #define MIDR_APPLE_M1_ICESTORM 0x610F0220 #define MIDR_APPLE_M1_FIRESTORM 0x610F0230 +// Kernel does not include those, so I just assume that +// APPLE_CPU_PART_M2_BLIZZARD=0x30,M2_AVALANCHE=0x31 +#define MIDR_APPLE_M2_BLIZZARD 0x610F0300 +#define MIDR_APPLE_M2_AVALANCHE 0x610F0310 + +// M1 / A14 #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3 #endif +// M2 / A15 +#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD + #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D +#endif + +// For detecting different M1 types +// NOTE: Could also be achieved detecting different +// MIDR values (e.g., APPLE_CPU_PART_M1_ICESTORM_PRO) #ifndef CPUSUBFAMILY_ARM_HG #define CPUSUBFAMILY_ARM_HG 2 #endif diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 44a2464..093e3e9 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -33,6 +33,7 @@ ISA_ARMv8_2_A, ISA_ARMv8_3_A, ISA_ARMv8_4_A, + ISA_ARMv8_5_A }; enum { @@ -95,6 +96,8 @@ UARCH_THUNDER, // Apple A13 processor (little cores). UARCH_ICESTORM, // Apple M1 processor (little cores). UARCH_FIRESTORM, // Apple M1 processor (big cores). + UARCH_BLIZZARD, // Apple M2 processor (little cores). + UARCH_AVALANCHE, // Apple M2 processor (big cores). // CAVIUM UARCH_THUNDERX, // Cavium ThunderX UARCH_THUNDERX2, // Cavium ThunderX2 (originally Broadcom Vulkan). @@ -155,8 +158,10 @@ [UARCH_EXYNOS_M3] = ISA_ARMv8_A, [UARCH_EXYNOS_M4] = ISA_ARMv8_2_A, [UARCH_EXYNOS_M5] = ISA_ARMv8_2_A, - [UARCH_ICESTORM] = ISA_ARMv8_4_A, - [UARCH_FIRESTORM] = ISA_ARMv8_4_A, + [UARCH_ICESTORM] = ISA_ARMv8_5_A, // https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def + [UARCH_FIRESTORM] = ISA_ARMv8_5_A, + [UARCH_BLIZZARD] = ISA_ARMv8_5_A, // Not confirmed + [UARCH_AVALANCHE] = ISA_ARMv8_5_A, [UARCH_PJ4] = ISA_ARMv7_A, [UARCH_XIAOMI] = ISA_ARMv8_A, }; @@ -172,7 +177,8 @@ [ISA_ARMv8_1_A] = "ARMv8.1", [ISA_ARMv8_2_A] = "ARMv8.2", [ISA_ARMv8_3_A] = "ARMv8.3", - [ISA_ARMv8_4_A] = "ARMv8.4" + [ISA_ARMv8_4_A] = "ARMv8.4", + [ISA_ARMv8_5_A] = "ARMv8.5" }; #define UARCH_START if (false) {} @@ -297,6 +303,8 @@ CHECK_UARCH(arch, cpu, 'a', 0x022, NA, NA, "Icestorm", UARCH_ICESTORM, CPU_VENDOR_APPLE) CHECK_UARCH(arch, cpu, 'a', 0x023, NA, NA, "Firestorm", UARCH_FIRESTORM, CPU_VENDOR_APPLE) + CHECK_UARCH(arch, cpu, 'a', 0x030, NA, NA, "Blizzard", UARCH_BLIZZARD, CPU_VENDOR_APPLE) + CHECK_UARCH(arch, cpu, 'a', 0x031, NA, NA, "Avalanche", UARCH_AVALANCHE, CPU_VENDOR_APPLE) CHECK_UARCH(arch, cpu, 'V', 0x581, NA, NA, "PJ4", UARCH_PJ4, CPU_VENDOR_MARVELL) CHECK_UARCH(arch, cpu, 'V', 0x584, NA, NA, "PJ4B-MP", UARCH_PJ4, CPU_VENDOR_MARVELL) diff --git a/src/common/udev.c b/src/common/udev.c index 2935e61..a759f26 100644 --- a/src/common/udev.c +++ b/src/common/udev.c @@ -166,7 +166,6 @@ } int get_num_caches_from_files(char** paths, int num_paths) { - int SHARED_MAP_MAX_LEN = 8 + 1; int filelen; char* buf; uint32_t* shared_maps = emalloc(sizeof(uint32_t *) * num_paths); @@ -175,11 +174,6 @@ for(int i=0; i < num_paths; i++) { if((buf = read_file(paths[i], &filelen)) == NULL) { printWarn("Could not open '%s'", paths[i]); - return -1; - } - - if(filelen > SHARED_MAP_MAX_LEN) { - printBug("Shared map length is %d while the max is be %d", filelen, SHARED_MAP_MAX_LEN); return -1; } diff --git a/src/x86/uarch.c b/src/x86/uarch.c index 118d056..489e1a8 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -79,7 +79,7 @@ UARCH_GOLDMONT_PLUS, UARCH_TREMONT, UARCH_LAKEMONT, - UARCH_COFFE_LAKE, + UARCH_COFFEE_LAKE, UARCH_ITANIUM, UARCH_KNIGHTS_FERRY, UARCH_KNIGHTS_CORNER, @@ -225,7 +225,7 @@ CHECK_UARCH(arch, 0, 6, 8, 12, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64 CHECK_UARCH(arch, 0, 6, 8, 13, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64 // CHECK_UARCH(arch, 0, 6, 8, 14, 9, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake or Amber Lake) - CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Kaby Lake", UARCH_KABY_LAKE, 14) // wikichip + CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 8, 14, 11, "Whiskey Lake", UARCH_WHISKEY_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 8, 14, 12, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 9, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX* @@ -234,10 +234,10 @@ CHECK_UARCH(arch, 0, 6, 9, 12, NA, "Tremont", UARCH_TREMONT, 10) // LX* CHECK_UARCH(arch, 0, 6, 9, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // LX* CHECK_UARCH(arch, 0, 6, 9, 14, 9, "Kaby Lake", UARCH_KABY_LAKE, 14) - CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFE_LAKE, 14) - CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFE_LAKE, 14) - CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFE_LAKE, 14) - CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFE_LAKE, 14) + CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14) + CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFEE_LAKE, 14) + CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFEE_LAKE, 14) + CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFEE_LAKE, 14) CHECK_UARCH(arch, 0, 6, 10, 5, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 10, 6, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // instlatx64.atw.hu (i7-10710U) CHECK_UARCH(arch, 0, 6, 10, 7, NA, "Rocket Lake", UARCH_ROCKET_LAKE, 14) // instlatx64.atw.hu (i7-11700K) @@ -358,6 +358,7 @@ CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example CHECK_UARCH(arch, 8, 15, 6, 8, NA, "Zen 2", UARCH_ZEN2, 7) // found on instlatx64 CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // samples from Steven Noonan and instlatx64 + CHECK_UARCH(arch, 8, 15, 9, 0, 2, "Zen 2", UARCH_ZEN2, 7) // Steam Deck (instlatx64) CHECK_UARCH(arch, 10, 15, 0, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 CHECK_UARCH(arch, 10, 15, 2, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 CHECK_UARCH(arch, 10, 15, 5, 0, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 @@ -408,7 +409,7 @@ case UARCH_ROCKET_LAKE: case UARCH_AMBER_LAKE: case UARCH_WHISKEY_LAKE: - case UARCH_COFFE_LAKE: + case UARCH_COFFEE_LAKE: case UARCH_PALM_COVE: case UARCH_KNIGHTS_LANDING: