Import upstream version 1.02+git20220914.1.9f7204d
Debian Janitor
1 year, 7 months ago
80 | 80 | } |
81 | 81 | |
82 | 82 | int64_t flops = 0; |
83 | ||
84 | 83 | ptr = cpu; |
85 | for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { | |
86 | flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); | |
87 | } | |
88 | if(cpu->feat->NEON) flops = flops * 4; | |
84 | ||
85 | if(cpu->cpu_vendor == SOC_VENDOR_APPLE) { | |
86 | // Special case for M1/M2 | |
87 | // First we find the E cores, then the P | |
88 | // M1 have 2 (E cores) or 4 (P cores) FMA units | |
89 | // Source: https://dougallj.github.io/applecpu/firestorm-simd.html | |
90 | flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 2; | |
91 | ptr = ptr->next_cpu; | |
92 | flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 4; | |
93 | } | |
94 | else { | |
95 | for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { | |
96 | flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); | |
97 | } | |
98 | if(cpu->feat->NEON) flops = flops * 4; | |
99 | } | |
89 | 100 | |
90 | 101 | return flops; |
91 | 102 | } |
272 | 283 | fire->next_cpu = NULL; |
273 | 284 | } |
274 | 285 | |
286 | void fill_cpu_info_avalanche_blizzard(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) { | |
287 | // 1. Fill BLIZZARD | |
288 | struct cpuInfo* bli = cpu; | |
289 | ||
290 | bli->midr = MIDR_APPLE_M2_BLIZZARD; | |
291 | bli->arch = get_uarch_from_midr(bli->midr, bli); | |
292 | bli->cach = get_cache_info(bli); | |
293 | bli->feat = get_features_info(); | |
294 | bli->topo = malloc(sizeof(struct topology)); | |
295 | bli->topo->cach = bli->cach; | |
296 | bli->topo->total_cores = pcores; | |
297 | bli->freq = malloc(sizeof(struct frequency)); | |
298 | bli->freq->base = UNKNOWN_DATA; | |
299 | bli->freq->max = 2800; | |
300 | bli->hv = malloc(sizeof(struct hypervisor)); | |
301 | bli->hv->present = false; | |
302 | bli->next_cpu = malloc(sizeof(struct cpuInfo)); | |
303 | ||
304 | // 2. Fill AVALANCHE | |
305 | struct cpuInfo* ava = bli->next_cpu; | |
306 | ava->midr = MIDR_APPLE_M2_AVALANCHE; | |
307 | ava->arch = get_uarch_from_midr(ava->midr, ava); | |
308 | ava->cach = get_cache_info(ava); | |
309 | ava->feat = get_features_info(); | |
310 | ava->topo = malloc(sizeof(struct topology)); | |
311 | ava->topo->cach = ava->cach; | |
312 | ava->topo->total_cores = ecores; | |
313 | ava->freq = malloc(sizeof(struct frequency)); | |
314 | ava->freq->base = UNKNOWN_DATA; | |
315 | ava->freq->max = 3500; | |
316 | ava->hv = malloc(sizeof(struct hypervisor)); | |
317 | ava->hv->present = false; | |
318 | ava->next_cpu = NULL; | |
319 | } | |
320 | ||
275 | 321 | struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) { |
276 | 322 | uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); |
277 | 323 | |
278 | // Manually fill the cpuInfo assuming that the CPU | |
279 | // is a ARM_FIRESTORM_ICESTORM (Apple M1) | |
324 | // Manually fill the cpuInfo assuming that | |
325 | // the CPU is an Apple M1/M2 | |
280 | 326 | if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) { |
281 | 327 | cpu->num_cpus = 2; |
282 | 328 | // Now detect the M1 version |
286 | 332 | fill_cpu_info_firestorm_icestorm(cpu, 4, 4); |
287 | 333 | } |
288 | 334 | else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS || cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { |
289 | // Apple M1 Pro/Max. Detect number of cores | |
335 | // Apple M1 Pro/Max/Ultra. Detect number of cores | |
290 | 336 | uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu"); |
291 | if(physicalcpu < 8 || physicalcpu > 10) { | |
292 | printBug("Found invalid physicalcpu: 0x%.8X", physicalcpu); | |
337 | if(physicalcpu == 20) { | |
338 | // M1 Ultra | |
339 | fill_cpu_info_firestorm_icestorm(cpu, 16, 4); | |
340 | } | |
341 | else if(physicalcpu == 8 || physicalcpu == 10) { | |
342 | // M1 Pro/Max | |
343 | fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2); | |
344 | } | |
345 | else { | |
346 | printBug("Found invalid physical cpu number: %d", physicalcpu); | |
293 | 347 | return NULL; |
294 | 348 | } |
295 | fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2); | |
296 | 349 | } |
297 | 350 | else { |
298 | 351 | printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); |
299 | 352 | return NULL; |
300 | 353 | } |
354 | cpu->soc = get_soc(); | |
355 | cpu->peak_performance = get_peak_performance(cpu); | |
356 | } | |
357 | else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) { | |
358 | // Just the "normal" M2 exists for now | |
359 | cpu->num_cpus = 2; | |
360 | fill_cpu_info_avalanche_blizzard(cpu, 4, 4); | |
301 | 361 | cpu->soc = get_soc(); |
302 | 362 | cpu->peak_performance = get_peak_performance(cpu); |
303 | 363 | } |
647 | 647 | |
648 | 648 | #if defined(__APPLE__) || defined(__MACH__) |
649 | 649 | struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) { |
650 | uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); | |
650 | 651 | uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily"); |
651 | if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { | |
652 | fill_soc(soc, "M1", SOC_APPLE_M1, 5); | |
653 | } | |
654 | else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS) { | |
655 | fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5); | |
656 | } | |
657 | else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { | |
658 | fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5); | |
652 | ||
653 | if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) { | |
654 | // Check M1 version | |
655 | if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { | |
656 | fill_soc(soc, "M1", SOC_APPLE_M1, 5); | |
657 | } | |
658 | else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS) { | |
659 | fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5); | |
660 | } | |
661 | else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { | |
662 | // Could be M1 Max or M1 Ultra (2x M1 Max) | |
663 | uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu"); | |
664 | if(physicalcpu == 20) { | |
665 | fill_soc(soc, "M1 Ultra", SOC_APPLE_M1_ULTRA, 5); | |
666 | } | |
667 | else if(physicalcpu == 10) { | |
668 | fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5); | |
669 | } | |
670 | else { | |
671 | printBug("Found invalid physical cpu number: %d", physicalcpu); | |
672 | soc->soc_vendor = SOC_VENDOR_UNKNOWN; | |
673 | } | |
674 | } | |
675 | else { | |
676 | printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); | |
677 | soc->soc_vendor = SOC_VENDOR_UNKNOWN; | |
678 | } | |
679 | } | |
680 | else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) { | |
681 | // Check M2 version | |
682 | if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { | |
683 | fill_soc(soc, "M2", SOC_APPLE_M2, 5); | |
684 | } | |
685 | else { | |
686 | printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); | |
687 | soc->soc_vendor = SOC_VENDOR_UNKNOWN; | |
688 | } | |
689 | } | |
690 | else { | |
691 | printBug("Found invalid cpu_family: 0x%.8X", cpu_family); | |
692 | soc->soc_vendor = SOC_VENDOR_UNKNOWN; | |
659 | 693 | } |
660 | 694 | return soc; |
661 | 695 | } |
255 | 255 | SOC_APPLE_M1, |
256 | 256 | SOC_APPLE_M1_PRO, |
257 | 257 | SOC_APPLE_M1_MAX, |
258 | SOC_APPLE_M1_ULTRA, | |
259 | SOC_APPLE_M2, | |
258 | 260 | // ALLWINNER |
259 | 261 | SOC_ALLWINNER_A10, |
260 | 262 | SOC_ALLWINNER_A13, |
287 | 289 | else if(soc >= SOC_EXYNOS_3475 && soc <= SOC_EXYNOS_880) return SOC_VENDOR_EXYNOS; |
288 | 290 | else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK; |
289 | 291 | else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8350) return SOC_VENDOR_SNAPDRAGON; |
290 | else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M1_MAX) return SOC_VENDOR_APPLE; | |
292 | else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M2) return SOC_VENDOR_APPLE; | |
291 | 293 | else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER; |
292 | 294 | return SOC_VENDOR_UNKNOWN; |
293 | 295 | } |
3 | 3 | // From Linux kernel: arch/arm64/include/asm/cputype.h |
4 | 4 | #define MIDR_APPLE_M1_ICESTORM 0x610F0220 |
5 | 5 | #define MIDR_APPLE_M1_FIRESTORM 0x610F0230 |
6 | // Kernel does not include those, so I just assume that | |
7 | // APPLE_CPU_PART_M2_BLIZZARD=0x30,M2_AVALANCHE=0x31 | |
8 | #define MIDR_APPLE_M2_BLIZZARD 0x610F0300 | |
9 | #define MIDR_APPLE_M2_AVALANCHE 0x610F0310 | |
10 | ||
11 | // M1 / A14 | |
6 | 12 | #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM |
7 | 13 | #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3 |
8 | 14 | #endif |
15 | // M2 / A15 | |
16 | #ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD | |
17 | #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D | |
18 | #endif | |
19 | ||
20 | // For detecting different M1 types | |
21 | // NOTE: Could also be achieved detecting different | |
22 | // MIDR values (e.g., APPLE_CPU_PART_M1_ICESTORM_PRO) | |
9 | 23 | #ifndef CPUSUBFAMILY_ARM_HG |
10 | 24 | #define CPUSUBFAMILY_ARM_HG 2 |
11 | 25 | #endif |
32 | 32 | ISA_ARMv8_2_A, |
33 | 33 | ISA_ARMv8_3_A, |
34 | 34 | ISA_ARMv8_4_A, |
35 | ISA_ARMv8_5_A | |
35 | 36 | }; |
36 | 37 | |
37 | 38 | enum { |
94 | 95 | UARCH_THUNDER, // Apple A13 processor (little cores). |
95 | 96 | UARCH_ICESTORM, // Apple M1 processor (little cores). |
96 | 97 | UARCH_FIRESTORM, // Apple M1 processor (big cores). |
98 | UARCH_BLIZZARD, // Apple M2 processor (little cores). | |
99 | UARCH_AVALANCHE, // Apple M2 processor (big cores). | |
97 | 100 | // CAVIUM |
98 | 101 | UARCH_THUNDERX, // Cavium ThunderX |
99 | 102 | UARCH_THUNDERX2, // Cavium ThunderX2 (originally Broadcom Vulkan). |
154 | 157 | [UARCH_EXYNOS_M3] = ISA_ARMv8_A, |
155 | 158 | [UARCH_EXYNOS_M4] = ISA_ARMv8_2_A, |
156 | 159 | [UARCH_EXYNOS_M5] = ISA_ARMv8_2_A, |
157 | [UARCH_ICESTORM] = ISA_ARMv8_4_A, | |
158 | [UARCH_FIRESTORM] = ISA_ARMv8_4_A, | |
160 | [UARCH_ICESTORM] = ISA_ARMv8_5_A, // https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def | |
161 | [UARCH_FIRESTORM] = ISA_ARMv8_5_A, | |
162 | [UARCH_BLIZZARD] = ISA_ARMv8_5_A, // Not confirmed | |
163 | [UARCH_AVALANCHE] = ISA_ARMv8_5_A, | |
159 | 164 | [UARCH_PJ4] = ISA_ARMv7_A, |
160 | 165 | [UARCH_XIAOMI] = ISA_ARMv8_A, |
161 | 166 | }; |
171 | 176 | [ISA_ARMv8_1_A] = "ARMv8.1", |
172 | 177 | [ISA_ARMv8_2_A] = "ARMv8.2", |
173 | 178 | [ISA_ARMv8_3_A] = "ARMv8.3", |
174 | [ISA_ARMv8_4_A] = "ARMv8.4" | |
179 | [ISA_ARMv8_4_A] = "ARMv8.4", | |
180 | [ISA_ARMv8_5_A] = "ARMv8.5" | |
175 | 181 | }; |
176 | 182 | |
177 | 183 | #define UARCH_START if (false) {} |
296 | 302 | |
297 | 303 | CHECK_UARCH(arch, cpu, 'a', 0x022, NA, NA, "Icestorm", UARCH_ICESTORM, CPU_VENDOR_APPLE) |
298 | 304 | CHECK_UARCH(arch, cpu, 'a', 0x023, NA, NA, "Firestorm", UARCH_FIRESTORM, CPU_VENDOR_APPLE) |
305 | CHECK_UARCH(arch, cpu, 'a', 0x030, NA, NA, "Blizzard", UARCH_BLIZZARD, CPU_VENDOR_APPLE) | |
306 | CHECK_UARCH(arch, cpu, 'a', 0x031, NA, NA, "Avalanche", UARCH_AVALANCHE, CPU_VENDOR_APPLE) | |
299 | 307 | |
300 | 308 | CHECK_UARCH(arch, cpu, 'V', 0x581, NA, NA, "PJ4", UARCH_PJ4, CPU_VENDOR_MARVELL) |
301 | 309 | CHECK_UARCH(arch, cpu, 'V', 0x584, NA, NA, "PJ4B-MP", UARCH_PJ4, CPU_VENDOR_MARVELL) |
165 | 165 | } |
166 | 166 | |
167 | 167 | int get_num_caches_from_files(char** paths, int num_paths) { |
168 | int SHARED_MAP_MAX_LEN = 8 + 1; | |
169 | 168 | int filelen; |
170 | 169 | char* buf; |
171 | 170 | uint32_t* shared_maps = emalloc(sizeof(uint32_t *) * num_paths); |
174 | 173 | for(int i=0; i < num_paths; i++) { |
175 | 174 | if((buf = read_file(paths[i], &filelen)) == NULL) { |
176 | 175 | printWarn("Could not open '%s'", paths[i]); |
177 | return -1; | |
178 | } | |
179 | ||
180 | if(filelen > SHARED_MAP_MAX_LEN) { | |
181 | printBug("Shared map length is %d while the max is be %d", filelen, SHARED_MAP_MAX_LEN); | |
182 | 176 | return -1; |
183 | 177 | } |
184 | 178 |
78 | 78 | UARCH_GOLDMONT_PLUS, |
79 | 79 | UARCH_TREMONT, |
80 | 80 | UARCH_LAKEMONT, |
81 | UARCH_COFFE_LAKE, | |
81 | UARCH_COFFEE_LAKE, | |
82 | 82 | UARCH_ITANIUM, |
83 | 83 | UARCH_KNIGHTS_FERRY, |
84 | 84 | UARCH_KNIGHTS_CORNER, |
224 | 224 | CHECK_UARCH(arch, 0, 6, 8, 12, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64 |
225 | 225 | CHECK_UARCH(arch, 0, 6, 8, 13, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64 |
226 | 226 | // CHECK_UARCH(arch, 0, 6, 8, 14, 9, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake or Amber Lake) |
227 | CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Kaby Lake", UARCH_KABY_LAKE, 14) // wikichip | |
227 | CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14) // wikichip | |
228 | 228 | CHECK_UARCH(arch, 0, 6, 8, 14, 11, "Whiskey Lake", UARCH_WHISKEY_LAKE, 14) // wikichip |
229 | 229 | CHECK_UARCH(arch, 0, 6, 8, 14, 12, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip |
230 | 230 | CHECK_UARCH(arch, 0, 6, 9, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX* |
233 | 233 | CHECK_UARCH(arch, 0, 6, 9, 12, NA, "Tremont", UARCH_TREMONT, 10) // LX* |
234 | 234 | CHECK_UARCH(arch, 0, 6, 9, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // LX* |
235 | 235 | CHECK_UARCH(arch, 0, 6, 9, 14, 9, "Kaby Lake", UARCH_KABY_LAKE, 14) |
236 | CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFE_LAKE, 14) | |
237 | CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFE_LAKE, 14) | |
238 | CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFE_LAKE, 14) | |
239 | CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFE_LAKE, 14) | |
236 | CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14) | |
237 | CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFEE_LAKE, 14) | |
238 | CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFEE_LAKE, 14) | |
239 | CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFEE_LAKE, 14) | |
240 | 240 | CHECK_UARCH(arch, 0, 6, 10, 5, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip |
241 | 241 | CHECK_UARCH(arch, 0, 6, 10, 6, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // instlatx64.atw.hu (i7-10710U) |
242 | 242 | CHECK_UARCH(arch, 0, 6, 10, 7, NA, "Rocket Lake", UARCH_ROCKET_LAKE, 14) // instlatx64.atw.hu (i7-11700K) |
357 | 357 | CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example |
358 | 358 | CHECK_UARCH(arch, 8, 15, 6, 8, NA, "Zen 2", UARCH_ZEN2, 7) // found on instlatx64 |
359 | 359 | CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // samples from Steven Noonan and instlatx64 |
360 | CHECK_UARCH(arch, 8, 15, 9, 0, 2, "Zen 2", UARCH_ZEN2, 7) // Steam Deck (instlatx64) | |
360 | 361 | CHECK_UARCH(arch, 10, 15, 0, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 |
361 | 362 | CHECK_UARCH(arch, 10, 15, 2, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 |
362 | 363 | CHECK_UARCH(arch, 10, 15, 5, 0, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 |
407 | 408 | case UARCH_ROCKET_LAKE: |
408 | 409 | case UARCH_AMBER_LAKE: |
409 | 410 | case UARCH_WHISKEY_LAKE: |
410 | case UARCH_COFFE_LAKE: | |
411 | case UARCH_COFFEE_LAKE: | |
411 | 412 | case UARCH_PALM_COVE: |
412 | 413 | |
413 | 414 | case UARCH_KNIGHTS_LANDING: |