diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
new file mode 100644
index 0000000..47137ee
--- /dev/null
+++ b/.github/workflows/go.yml
@@ -0,0 +1,57 @@
+name: Go
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        go-version: [1.15.x, 1.16.x, 1.17.x]
+        os: [ubuntu-latest, macos-latest, windows-latest]  
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Go
+      uses: actions/setup-go@v2
+      with:
+        go-version: ${{ matrix.go-version }}
+
+    - name: Vet
+      run: go vet ./...
+
+    - name: Test
+      run: go test ./...
+
+    - name: Test Noasm
+      run: go test -tags=noasm ./...
+      
+  build-special:
+    env:
+      CGO_ENABLED: 0
+    runs-on: ubuntu-latest
+    steps:
+    - name: Set up Go
+      uses: actions/setup-go@v2      
+      with:
+        go-version: 1.16.x
+
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: fmt
+      run: diff <(gofmt -d .) <(printf "")
+
+    - name: Test 386
+      run: GOOS=linux GOARCH=386 go test -short ./...
+
+    - name: goreleaser deprecation
+      run: curl -sfL https://git.io/goreleaser | VERSION=v0.162.0 sh -s -- check
+      
+    - name: goreleaser snapshot
+      run: curl -sL https://git.io/goreleaser | VERSION=v0.162.0 sh -s -- --snapshot --skip-publish --rm-dist
+       
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..91b70c3
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,30 @@
+name: goreleaser
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  goreleaser:
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Checkout
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      -
+        name: Set up Go
+        uses: actions/setup-go@v2
+        with:
+          go-version: 1.16
+      -
+        name: Run GoReleaser
+        uses: goreleaser/goreleaser-action@v2
+        with:
+          version: 0.162.0
+          args: release --rm-dist
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CGO_ENABLED: 0
diff --git a/.goreleaser.yml b/.goreleaser.yml
new file mode 100644
index 0000000..944cc00
--- /dev/null
+++ b/.goreleaser.yml
@@ -0,0 +1,74 @@
+# This is an example goreleaser.yaml file with some sane defaults.
+# Make sure to check the documentation at http://goreleaser.com
+
+builds:
+  -
+    id: "cpuid"
+    binary: cpuid
+    main: ./cmd/cpuid/main.go
+    env:
+      - CGO_ENABLED=0
+    flags:
+      - -ldflags=-s -w
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm64
+    goarm:
+      - 7
+
+archives:
+  -
+    id: cpuid
+    name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
+    replacements:
+      aix: AIX
+      darwin: OSX
+      linux: Linux
+      windows: Windows
+      386: i386
+      amd64: x86_64
+      freebsd: FreeBSD
+      netbsd: NetBSD
+    format_overrides:
+      - goos: windows
+        format: zip
+    files:
+      - LICENSE
+checksum:
+  name_template: 'checksums.txt'
+snapshot:
+  name_template: "{{ .Tag }}-next"
+changelog:
+  sort: asc
+  filters:
+    exclude:
+    - '^doc:'
+    - '^docs:'
+    - '^test:'
+    - '^tests:'
+    - '^Update\sREADME.md'
+
+nfpms:
+  -
+    file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    vendor: Klaus Post
+    homepage: https://github.com/klauspost/cpuid
+    maintainer: Klaus Post <klauspost@gmail.com>
+    description: CPUID Tool
+    license: BSD 3-Clause
+    formats:
+      - deb
+      - rpm
+    replacements:
+      darwin: Darwin
+      linux: Linux
+      freebsd: FreeBSD
+      amd64: x86_64
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 77d975f..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-language: go
-
-os:
-  - linux
-  - osx
-  - windows
-
-arch:
-  - amd64
-  - arm64
-
-go:
-  - 1.12.x
-  - 1.13.x
-  - 1.14.x
-  - master
-
-script:
-  - go vet ./...
-  - go test -race ./...
-  - go test -tags=noasm ./...
-
-stages:
-  - gofmt
-  - test
-
-matrix:
-  allow_failures:
-    - go: 'master'
-  fast_finish: true
-  include:
-    - stage: gofmt
-      go: 1.14.x
-      os: linux
-      arch: amd64
-      script:
-        - diff <(gofmt -d .) <(printf "")
-        - diff <(gofmt -d ./private) <(printf "")
-        - go install github.com/klauspost/asmfmt/cmd/asmfmt
-        - diff <(asmfmt -d .) <(printf "")
-    - stage: i386
-      go: 1.14.x
-      os: linux
-      arch: amd64
-      script:
-        - GOOS=linux GOARCH=386 go test .
diff --git a/README.md b/README.md
index 38d4a8b..bc2f98f 100644
--- a/README.md
+++ b/README.md
@@ -8,159 +8,48 @@ You can access the CPU information by accessing the shared CPU variable of the c
 
 Package home: https://github.com/klauspost/cpuid
 
-[![GoDoc][1]][2] [![Build Status][3]][4]
+[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
+[![Build Status][3]][4]
 
-[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
-[2]: https://godoc.org/github.com/klauspost/cpuid
 [3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
 [4]: https://travis-ci.org/klauspost/cpuid
 
-# features
-
-## x86 CPU Instructions
-*  **CMOV** (i686 CMOV)
-*  **NX** (NX (No-Execute) bit)
-*  **AMD3DNOW** (AMD 3DNOW)
-*  **AMD3DNOWEXT** (AMD 3DNowExt)
-*  **MMX** (standard MMX)
-*  **MMXEXT** (SSE integer functions or AMD MMX ext)
-*  **SSE** (SSE functions)
-*  **SSE2** (P4 SSE functions)
-*  **SSE3** (Prescott SSE3 functions)
-*  **SSSE3** (Conroe SSSE3 functions)
-*  **SSE4** (Penryn SSE4.1 functions)
-*  **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
-*  **SSE42** (Nehalem SSE4.2 functions)
-*  **AVX** (AVX functions)
-*  **AVX2** (AVX2 functions)
-*  **FMA3** (Intel FMA 3)
-*  **FMA4** (Bulldozer FMA4 functions)
-*  **XOP** (Bulldozer XOP functions)
-*  **F16C** (Half-precision floating-point conversion)
-*  **BMI1** (Bit Manipulation Instruction Set 1)
-*  **BMI2** (Bit Manipulation Instruction Set 2)
-*  **TBM** (AMD Trailing Bit Manipulation)
-*  **LZCNT** (LZCNT instruction)
-*  **POPCNT** (POPCNT instruction)
-*  **AESNI** (Advanced Encryption Standard New Instructions)
-*  **CLMUL** (Carry-less Multiplication)
-*  **HTT** (Hyperthreading (enabled))
-*  **HLE** (Hardware Lock Elision)
-*  **RTM** (Restricted Transactional Memory)
-*  **RDRAND** (RDRAND instruction is available)
-*  **RDSEED** (RDSEED instruction is available)
-*  **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
-*  **SHA** (Intel SHA Extensions)
-*  **AVX512F** (AVX-512 Foundation)
-*  **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
-*  **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
-*  **AVX512PF** (AVX-512 Prefetch Instructions)
-*  **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
-*  **AVX512CD** (AVX-512 Conflict Detection Instructions)
-*  **AVX512BW** (AVX-512 Byte and Word Instructions)
-*  **AVX512VL** (AVX-512 Vector Length Extensions)
-*  **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
-*  **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
-*  **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
-*  **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
-*  **GFNI** (Galois Field New Instructions)
-*  **VAES** (Vector AES)
-*  **AVX512BITALG** (AVX-512 Bit Algorithms)
-*  **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
-*  **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
-*  **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
-*  **MPX** (Intel MPX (Memory Protection Extensions))
-*  **ERMS** (Enhanced REP MOVSB/STOSB)
-*  **RDTSCP** (RDTSCP Instruction)
-*  **CX16** (CMPXCHG16B Instruction)
-*  **SGX** (Software Guard Extensions, with activation details)
-*  **VMX** (Virtual Machine Extensions)
-
-## Performance
-*  **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
-*  **SSE2SLOW** (SSE2 is supported, but usually not faster)
-*  **SSE3SLOW** (SSE3 is supported, but usually not faster)
-*  **ATOM** (Atom processor, some SSSE3 instructions are slower)
-*  **Cache line** (Probable size of a cache line).
-*  **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
-
-## ARM CPU features
-
-# ARM FEATURE DETECTION DISABLED!
-
-See [#52](https://github.com/klauspost/cpuid/issues/52).
- 
-Currently only `arm64` platforms are implemented. 
-
-*  **FP**  Single-precision and double-precision floating point
-*  **ASIMD**  Advanced SIMD
-*  **EVTSTRM**  Generic timer
-*  **AES**  AES instructions
-*  **PMULL**  Polynomial Multiply instructions (PMULL/PMULL2)
-*  **SHA1**  SHA-1 instructions (SHA1C, etc)
-*  **SHA2**      SHA-2 instructions (SHA256H, etc)
-*  **CRC32**   CRC32/CRC32C instructions
-*  **ATOMICS**   Large System Extensions (LSE)
-*  **FPHP** Half-precision floating point
-*  **ASIMDHP**  Advanced SIMD half-precision floating point
-*  **ARMCPUID**  Some CPU ID registers readable at user-level
-*  **ASIMDRDM**  Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-*  **JSCVT** Javascript-style double->int convert (FJCVTZS)
-*  **FCMA**  Floating point complex number addition and multiplication
-*  **LRCPC**  Weaker release consistency (LDAPR, etc)
-*  **DCPOP**  Data cache clean to Point of Persistence (DC CVAP)
-*  **SHA3**  SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-*  **SM3** SM3 instructions
-*  **SM4**  SM4 instructions
-*  **ASIMDDP**  SIMD Dot Product
-*  **SHA512**  SHA512 instructions
-*  **SVE** Scalable Vector Extension
-*  **GPA**  Generic Pointer Authentication
-
-## Cpu Vendor/VM
-* **Intel**
-* **AMD**
-* **VIA**
-* **Transmeta**
-* **NSC**
-* **KVM**  (Kernel-based Virtual Machine)
-* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
-* **VMware**
-* **XenHVM**
-* **Bhyve**
-* **Hygon**
-
-# installing
-
-```go get github.com/klauspost/cpuid```
-
-# example
+## installing
+
+`go get -u github.com/klauspost/cpuid/v2` using modules. 
+
+Drop `v2` for others.
+
+## example
 
 ```Go
 package main
 
 import (
 	"fmt"
-	"github.com/klauspost/cpuid"
+	"strings"
+
+	. "github.com/klauspost/cpuid/v2"
 )
 
 func main() {
 	// Print basic CPU information:
-	fmt.Println("Name:", cpuid.CPU.BrandName)
-	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
-	fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
-	fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
-	fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
-	fmt.Println("Features:", cpuid.CPU.Features)
-	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
-	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
-	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
-	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
-	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
-
-	// Test if we have a specific feature:
-	if cpuid.CPU.SSE() {
-		fmt.Println("We have Streaming SIMD Extensions")
+	fmt.Println("Name:", CPU.BrandName)
+	fmt.Println("PhysicalCores:", CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", CPU.LogicalCores)
+	fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
+	fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
+	fmt.Println("Cacheline bytes:", CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
+	fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
+	fmt.Println("Frequency", CPU.Hz, "hz")
+
+	// Test if we have these specific features:
+	if CPU.Supports(SSE, SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
 	}
 }
 ```
@@ -168,23 +57,80 @@ func main() {
 Sample output:
 ```
 >go run main.go
-Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
-PhysicalCores: 2
+Name: AMD Ryzen 9 3950X 16-Core Processor
+PhysicalCores: 16
 ThreadsPerCore: 2
-LogicalCores: 4
-Family 6 Model: 42
-Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
+LogicalCores: 32
+Family 23 Model: 113 Vendor ID: AMD
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
 Cacheline bytes: 64
-We have Streaming SIMD Extensions
+L1 Data Cache: 32768 bytes
+L1 Instruction Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+Frequency 0 hz
+We have Streaming SIMD 2 Extensions
 ```
 
-# private package
+# usage
+
+The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
+A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.  
+
+Note that for some cpu/os combinations some features will not be detected.
+`amd64` has rather good support and should work reliably on all platforms.
+
+Note that hypervisors may not pass through all CPU features.
+
+## arm64 feature detection
+
+Not all operating systems provide ARM features directly 
+and there is no safe way to do so for the rest.
+
+Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. 
+`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
+
+A `DetectARM()` can be used if you are able to control your deployment,
+it will detect CPU features, but may crash if the OS doesn't intercept the calls.
+A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
+ 
+Note that currently only features are detected on ARM, 
+no additional information is currently available. 
+
+## flags
+
+It is possible to add flags that affects cpu detection.
 
-In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
+For this the `Flags()` command is provided.
 
-For this purpose all exports are removed, and functions and constants are lowercased.
+This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
 
-This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
+This means that any detection used in `init()` functions will not contain these flags.
+
+Example:
+
+```Go
+package main
+
+import (
+	"flag"
+	"fmt"
+	"strings"
+
+	"github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+	cpuid.Flags()
+	flag.Parse()
+	cpuid.Detect()
+
+	// Test if we have these specific features:
+	if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
+	}
+}
+```
 
 # license
 
diff --git a/cmd/cpuid/main.go b/cmd/cpuid/main.go
new file mode 100644
index 0000000..5869a4b
--- /dev/null
+++ b/cmd/cpuid/main.go
@@ -0,0 +1,82 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) as well as arm64 is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"strings"
+
+	"github.com/klauspost/cpuid/v2"
+)
+
+var js = flag.Bool("json", false, "Output as JSON")
+var level = flag.Int("check-level", 0, "Check microarchitecture level. Exit code will be 0 if supported")
+
+func main() {
+	flag.Parse()
+	if level != nil && *level > 0 {
+		if *level < 1 || *level > 4 {
+			log.Fatalln("Supply CPU level 1-4 to test as argument")
+		}
+		log.Println(cpuid.CPU.BrandName)
+		if cpuid.CPU.X64Level() < *level {
+			// Does os.Exit(1)
+			log.Fatalf("Microarchitecture level %d not supported. Max level is %d.", *level, cpuid.CPU.X64Level())
+		}
+		log.Printf("Microarchitecture level %d is supported. Max level is %d.", *level, cpuid.CPU.X64Level())
+		os.Exit(0)
+	}
+	if *js {
+		info := struct {
+			cpuid.CPUInfo
+			Features []string
+			X64Level int
+		}{
+			CPUInfo:  cpuid.CPU,
+			Features: cpuid.CPU.FeatureSet(),
+			X64Level: cpuid.CPU.X64Level(),
+		}
+		b, err := json.MarshalIndent(info, "", "  ")
+		if err != nil {
+			panic(err)
+		}
+		fmt.Println(string(b))
+		os.Exit(0)
+	}
+
+	fmt.Println("Name:", cpuid.CPU.BrandName)
+	fmt.Println("Vendor String:", cpuid.CPU.VendorString)
+	fmt.Println("Vendor ID:", cpuid.CPU.VendorID)
+	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
+	fmt.Println("Threads Per Core:", cpuid.CPU.ThreadsPerCore)
+	fmt.Println("Logical Cores:", cpuid.CPU.LogicalCores)
+	fmt.Println("CPU Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
+	fmt.Println("Features:", strings.Join(cpuid.CPU.FeatureSet(), ","))
+	fmt.Println("Microarchitecture level:", cpuid.CPU.X64Level())
+	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
+	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1I, "bytes")
+	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
+	if cpuid.CPU.Hz > 0 {
+		fmt.Println("Frequency:", cpuid.CPU.Hz, "Hz")
+	}
+	if cpuid.CPU.BoostFreq > 0 {
+		fmt.Println("Boost Frequency:", cpuid.CPU.BoostFreq, "Hz")
+	}
+	if cpuid.CPU.SGX.Available {
+		fmt.Printf("SGX: %+v\n", cpuid.CPU.SGX)
+	}
+}
diff --git a/cpuid.go b/cpuid.go
index 208b3e7..3d543ce 100644
--- a/cpuid.go
+++ b/cpuid.go
@@ -11,7 +11,11 @@
 package cpuid
 
 import (
+	"flag"
+	"fmt"
 	"math"
+	"os"
+	"runtime"
 	"strings"
 )
 
@@ -22,7 +26,7 @@ import (
 type Vendor int
 
 const (
-	Other Vendor = iota
+	VendorUnknown Vendor = iota
 	Intel
 	AMD
 	VIA
@@ -36,215 +40,185 @@ const (
 	Hygon
 	SiS
 	RDC
-)
 
-const (
-	CMOV               = 1 << iota // i686 CMOV
-	NX                             // NX (No-Execute) bit
-	AMD3DNOW                       // AMD 3DNOW
-	AMD3DNOWEXT                    // AMD 3DNowExt
-	MMX                            // standard MMX
-	MMXEXT                         // SSE integer functions or AMD MMX ext
-	SSE                            // SSE functions
-	SSE2                           // P4 SSE functions
-	SSE3                           // Prescott SSE3 functions
-	SSSE3                          // Conroe SSSE3 functions
-	SSE4                           // Penryn SSE4.1 functions
-	SSE4A                          // AMD Barcelona microarchitecture SSE4a instructions
-	SSE42                          // Nehalem SSE4.2 functions
-	AVX                            // AVX functions
-	AVX2                           // AVX2 functions
-	FMA3                           // Intel FMA 3
-	FMA4                           // Bulldozer FMA4 functions
-	XOP                            // Bulldozer XOP functions
-	F16C                           // Half-precision floating-point conversion
-	BMI1                           // Bit Manipulation Instruction Set 1
-	BMI2                           // Bit Manipulation Instruction Set 2
-	TBM                            // AMD Trailing Bit Manipulation
-	LZCNT                          // LZCNT instruction
-	POPCNT                         // POPCNT instruction
-	AESNI                          // Advanced Encryption Standard New Instructions
-	CLMUL                          // Carry-less Multiplication
-	HTT                            // Hyperthreading (enabled)
-	HLE                            // Hardware Lock Elision
-	RTM                            // Restricted Transactional Memory
-	RDRAND                         // RDRAND instruction is available
-	RDSEED                         // RDSEED instruction is available
-	ADX                            // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	SHA                            // Intel SHA Extensions
-	AVX512F                        // AVX-512 Foundation
-	AVX512DQ                       // AVX-512 Doubleword and Quadword Instructions
-	AVX512IFMA                     // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF                       // AVX-512 Prefetch Instructions
-	AVX512ER                       // AVX-512 Exponential and Reciprocal Instructions
-	AVX512CD                       // AVX-512 Conflict Detection Instructions
-	AVX512BW                       // AVX-512 Byte and Word Instructions
-	AVX512VL                       // AVX-512 Vector Length Extensions
-	AVX512VBMI                     // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2                    // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VNNI                     // AVX-512 Vector Neural Network Instructions
-	AVX512VPOPCNTDQ                // AVX-512 Vector Population Count Doubleword and Quadword
-	GFNI                           // Galois Field New Instructions
-	VAES                           // Vector AES
-	AVX512BITALG                   // AVX-512 Bit Algorithms
-	VPCLMULQDQ                     // Carry-Less Multiplication Quadword
-	AVX512BF16                     // AVX-512 BFLOAT16 Instructions
-	AVX512VP2INTERSECT             // AVX-512 Intersect for D/Q
-	MPX                            // Intel MPX (Memory Protection Extensions)
-	ERMS                           // Enhanced REP MOVSB/STOSB
-	RDTSCP                         // RDTSCP Instruction
-	CX16                           // CMPXCHG16B Instruction
-	SGX                            // Software Guard Extensions
-	SGXLC                          // Software Guard Extensions Launch Control
-	IBPB                           // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
-	STIBP                          // Single Thread Indirect Branch Predictors
-	VMX                            // Virtual Machine Extensions
-
-	// Performance indicators
-	SSE2SLOW // SSE2 is supported, but usually not faster
-	SSE3SLOW // SSE3 is supported, but usually not faster
-	ATOM     // Atom processor, some SSSE3 instructions are slower
+	Ampere
+	ARM
+	Broadcom
+	Cavium
+	DEC
+	Fujitsu
+	Infineon
+	Motorola
+	NVIDIA
+	AMCC
+	Qualcomm
+	Marvell
+
+	lastVendor
 )
 
-var flagNames = map[Flags]string{
-	CMOV:               "CMOV",               // i686 CMOV
-	NX:                 "NX",                 // NX (No-Execute) bit
-	AMD3DNOW:           "AMD3DNOW",           // AMD 3DNOW
-	AMD3DNOWEXT:        "AMD3DNOWEXT",        // AMD 3DNowExt
-	MMX:                "MMX",                // Standard MMX
-	MMXEXT:             "MMXEXT",             // SSE integer functions or AMD MMX ext
-	SSE:                "SSE",                // SSE functions
-	SSE2:               "SSE2",               // P4 SSE2 functions
-	SSE3:               "SSE3",               // Prescott SSE3 functions
-	SSSE3:              "SSSE3",              // Conroe SSSE3 functions
-	SSE4:               "SSE4.1",             // Penryn SSE4.1 functions
-	SSE4A:              "SSE4A",              // AMD Barcelona microarchitecture SSE4a instructions
-	SSE42:              "SSE4.2",             // Nehalem SSE4.2 functions
-	AVX:                "AVX",                // AVX functions
-	AVX2:               "AVX2",               // AVX functions
-	FMA3:               "FMA3",               // Intel FMA 3
-	FMA4:               "FMA4",               // Bulldozer FMA4 functions
-	XOP:                "XOP",                // Bulldozer XOP functions
-	F16C:               "F16C",               // Half-precision floating-point conversion
-	BMI1:               "BMI1",               // Bit Manipulation Instruction Set 1
-	BMI2:               "BMI2",               // Bit Manipulation Instruction Set 2
-	TBM:                "TBM",                // AMD Trailing Bit Manipulation
-	LZCNT:              "LZCNT",              // LZCNT instruction
-	POPCNT:             "POPCNT",             // POPCNT instruction
-	AESNI:              "AESNI",              // Advanced Encryption Standard New Instructions
-	CLMUL:              "CLMUL",              // Carry-less Multiplication
-	HTT:                "HTT",                // Hyperthreading (enabled)
-	HLE:                "HLE",                // Hardware Lock Elision
-	RTM:                "RTM",                // Restricted Transactional Memory
-	RDRAND:             "RDRAND",             // RDRAND instruction is available
-	RDSEED:             "RDSEED",             // RDSEED instruction is available
-	ADX:                "ADX",                // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	SHA:                "SHA",                // Intel SHA Extensions
-	AVX512F:            "AVX512F",            // AVX-512 Foundation
-	AVX512DQ:           "AVX512DQ",           // AVX-512 Doubleword and Quadword Instructions
-	AVX512IFMA:         "AVX512IFMA",         // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF:           "AVX512PF",           // AVX-512 Prefetch Instructions
-	AVX512ER:           "AVX512ER",           // AVX-512 Exponential and Reciprocal Instructions
-	AVX512CD:           "AVX512CD",           // AVX-512 Conflict Detection Instructions
-	AVX512BW:           "AVX512BW",           // AVX-512 Byte and Word Instructions
-	AVX512VL:           "AVX512VL",           // AVX-512 Vector Length Extensions
-	AVX512VBMI:         "AVX512VBMI",         // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2:        "AVX512VBMI2",        // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VNNI:         "AVX512VNNI",         // AVX-512 Vector Neural Network Instructions
-	AVX512VPOPCNTDQ:    "AVX512VPOPCNTDQ",    // AVX-512 Vector Population Count Doubleword and Quadword
-	GFNI:               "GFNI",               // Galois Field New Instructions
-	VAES:               "VAES",               // Vector AES
-	AVX512BITALG:       "AVX512BITALG",       // AVX-512 Bit Algorithms
-	VPCLMULQDQ:         "VPCLMULQDQ",         // Carry-Less Multiplication Quadword
-	AVX512BF16:         "AVX512BF16",         // AVX-512 BFLOAT16 Instruction
-	AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
-	MPX:                "MPX",                // Intel MPX (Memory Protection Extensions)
-	ERMS:               "ERMS",               // Enhanced REP MOVSB/STOSB
-	RDTSCP:             "RDTSCP",             // RDTSCP Instruction
-	CX16:               "CX16",               // CMPXCHG16B Instruction
-	SGX:                "SGX",                // Software Guard Extensions
-	SGXLC:              "SGXLC",              // Software Guard Extensions Launch Control
-	IBPB:               "IBPB",               // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
-	STIBP:              "STIBP",              // Single Thread Indirect Branch Predictors
-	VMX:                "VMX",                // Virtual Machine Extensions
-
-	// Performance indicators
-	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
-	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
-	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
+//go:generate stringer -type=FeatureID,Vendor
 
-}
+// FeatureID is the ID of a specific cpu feature.
+type FeatureID int
 
-/* all special features for arm64 should be defined here */
 const (
-	/* extension instructions */
-	FP ArmFlags = 1 << iota
-	ASIMD
-	EVTSTRM
-	AES
-	PMULL
-	SHA1
-	SHA2
-	CRC32
-	ATOMICS
-	FPHP
-	ASIMDHP
-	ARMCPUID
-	ASIMDRDM
-	JSCVT
-	FCMA
-	LRCPC
-	DCPOP
-	SHA3
-	SM3
-	SM4
-	ASIMDDP
-	SHA512
-	SVE
-	GPA
+	// Keep index -1 as unknown
+	UNKNOWN = -1
+
+	// Add features
+	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	AESNI                               // Advanced Encryption Standard New Instructions
+	AMD3DNOW                            // AMD 3DNOW
+	AMD3DNOWEXT                         // AMD 3DNowExt
+	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
+	AMXINT8                             // Tile computational operations on 8-bit integers
+	AMXTILE                             // Tile architecture
+	AVX                                 // AVX functions
+	AVX2                                // AVX2 functions
+	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
+	AVX512BITALG                        // AVX-512 Bit Algorithms
+	AVX512BW                            // AVX-512 Byte and Word Instructions
+	AVX512CD                            // AVX-512 Conflict Detection Instructions
+	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
+	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
+	AVX512F                             // AVX-512 Foundation
+	AVX512FP16                          // AVX-512 FP16 Instructions
+	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                            // AVX-512 Prefetch Instructions
+	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
+	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512VL                            // AVX-512 Vector Length Extensions
+	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
+	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
+	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one.
+	BMI1                                // Bit Manipulation Instruction Set 1
+	BMI2                                // Bit Manipulation Instruction Set 2
+	CETIBT                              // Intel CET Indirect Branch Tracking
+	CETSS                               // Intel CET Shadow Stack
+	CLDEMOTE                            // Cache Line Demote
+	CLMUL                               // Carry-less Multiplication
+	CLZERO                              // CLZERO instruction supported
+	CMOV                                // i686 CMOV
+	CMPXCHG8                            // CMPXCHG8 instruction
+	CPBOOST                             // Core Performance Boost
+	CX16                                // CMPXCHG16B Instruction
+	ENQCMD                              // Enqueue Command
+	ERMS                                // Enhanced REP MOVSB/STOSB
+	F16C                                // Half-precision floating-point conversion
+	FMA3                                // Intel FMA 3. Does not imply AVX.
+	FMA4                                // Bulldozer FMA4 functions
+	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                             // FXSAVE/FXRSTOR optimizations
+	GFNI                                // Galois Field New Instructions
+	HLE                                 // Hardware Lock Elision
+	HTT                                 // Hyperthreading (enabled)
+	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
+	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
+	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBS                                 // Instruction Based Sampling (AMD)
+	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
+	IBSFFV                              // Instruction Based Sampling Feature (AMD)
+	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
+	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
+	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
+	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
+	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
+	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
+	LAHF                                // LAHF/SAHF in long mode
+	LZCNT                               // LZCNT instruction
+	MCAOVERFLOW                         // MCA overflow recovery support.
+	MCOMMIT                             // MCOMMIT instruction supported
+	MMX                                 // standard MMX
+	MMXEXT                              // SSE integer functions or AMD MMX ext
+	MOVBE                               // MOVBE instruction (big-endian)
+	MOVDIR64B                           // Move 64 Bytes as Direct Store
+	MOVDIRI                             // Move Doubleword as Direct Store
+	MPX                                 // Intel MPX (Memory Protection Extensions)
+	MSRIRC                              // Instruction Retired Counter MSR available
+	NX                                  // NX (No-Execute) bit
+	OSXSAVE                             // XSAVE enabled by OS
+	POPCNT                              // POPCNT instruction
+	RDPRU                               // RDPRU instruction supported
+	RDRAND                              // RDRAND instruction is available
+	RDSEED                              // RDSEED instruction is available
+	RDTSCP                              // RDTSCP Instruction
+	RTM                                 // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
+	SCE                                 // SYSENTER and SYSEXIT instructions
+	SERIALIZE                           // Serialize Instruction Execution
+	SGX                                 // Software Guard Extensions
+	SGXLC                               // Software Guard Extensions Launch Control
+	SHA                                 // Intel SHA Extensions
+	SSE                                 // SSE functions
+	SSE2                                // P4 SSE functions
+	SSE3                                // Prescott SSE3 functions
+	SSE4                                // Penryn SSE4.1 functions
+	SSE42                               // Nehalem SSE4.2 functions
+	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
+	SSSE3                               // Conroe SSSE3 functions
+	STIBP                               // Single Thread Indirect Branch Predictors
+	SUCCOR                              // Software uncorrectable error containment and recovery capability.
+	TBM                                 // AMD Trailing Bit Manipulation
+	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
+	VAES                                // Vector AES
+	VMX                                 // Virtual Machine Extensions
+	VPCLMULQDQ                          // Carry-Less Multiplication Quadword
+	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
+	WBNOINVD                            // Write Back and Do Not Invalidate Cache
+	X87                                 // FPU
+	XOP                                 // Bulldozer XOP functions
+	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
+
+	// ARM features:
+	AESARM   // AES instructions
+	ARMCPUID // Some CPU ID registers readable at user-level
+	ASIMD    // Advanced SIMD
+	ASIMDDP  // SIMD Dot Product
+	ASIMDHP  // Advanced SIMD half-precision floating point
+	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+	ATOMICS  // Large System Extensions (LSE)
+	CRC32    // CRC32/CRC32C instructions
+	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
+	EVTSTRM  // Generic timer
+	FCMA     // Floatin point complex number addition and multiplication
+	FP       // Single-precision and double-precision floating point
+	FPHP     // Half-precision floating point
+	GPA      // Generic Pointer Authentication
+	JSCVT    // Javascript-style double->int convert (FJCVTZS)
+	LRCPC    // Weaker release consistency (LDAPR, etc)
+	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
+	SHA1     // SHA-1 instructions (SHA1C, etc)
+	SHA2     // SHA-2 instructions (SHA256H, etc)
+	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+	SHA512   // SHA512 instructions
+	SM3      // SM3 instructions
+	SM4      // SM4 instructions
+	SVE      // Scalable Vector Extension
+
+	// Keep it last. It automatically defines the size of []flagSet
+	lastID
+
+	firstID FeatureID = UNKNOWN + 1
 )
 
-var flagNamesArm = map[ArmFlags]string{
-	FP:       "FP",       // Single-precision and double-precision floating point
-	ASIMD:    "ASIMD",    // Advanced SIMD
-	EVTSTRM:  "EVTSTRM",  // Generic timer
-	AES:      "AES",      // AES instructions
-	PMULL:    "PMULL",    // Polynomial Multiply instructions (PMULL/PMULL2)
-	SHA1:     "SHA1",     // SHA-1 instructions (SHA1C, etc)
-	SHA2:     "SHA2",     // SHA-2 instructions (SHA256H, etc)
-	CRC32:    "CRC32",    // CRC32/CRC32C instructions
-	ATOMICS:  "ATOMICS",  // Large System Extensions (LSE)
-	FPHP:     "FPHP",     // Half-precision floating point
-	ASIMDHP:  "ASIMDHP",  // Advanced SIMD half-precision floating point
-	ARMCPUID: "CPUID",    // Some CPU ID registers readable at user-level
-	ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-	JSCVT:    "JSCVT",    // Javascript-style double->int convert (FJCVTZS)
-	FCMA:     "FCMA",     // Floatin point complex number addition and multiplication
-	LRCPC:    "LRCPC",    // Weaker release consistency (LDAPR, etc)
-	DCPOP:    "DCPOP",    // Data cache clean to Point of Persistence (DC CVAP)
-	SHA3:     "SHA3",     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-	SM3:      "SM3",      // SM3 instructions
-	SM4:      "SM4",      // SM4 instructions
-	ASIMDDP:  "ASIMDDP",  // SIMD Dot Product
-	SHA512:   "SHA512",   // SHA512 instructions
-	SVE:      "SVE",      // Scalable Vector Extension
-	GPA:      "GPA",      // Generic Pointer Authentication
-}
-
 // CPUInfo contains information about the detected system CPU.
 type CPUInfo struct {
-	BrandName      string   // Brand name reported by the CPU
-	VendorID       Vendor   // Comparable CPU vendor ID
-	VendorString   string   // Raw vendor string.
-	Features       Flags    // Features of the CPU (x64)
-	Arm            ArmFlags // Features of the CPU (arm)
-	PhysicalCores  int      // Number of physical processor cores in your CPU. Will be 0 if undetectable.
-	ThreadsPerCore int      // Number of threads per physical core. Will be 1 if undetectable.
-	LogicalCores   int      // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
-	Family         int      // CPU family number
-	Model          int      // CPU model number
-	CacheLine      int      // Cache line size in bytes. Will be 0 if undetectable.
-	Hz             int64    // Clock speed, if known
+	BrandName      string  // Brand name reported by the CPU
+	VendorID       Vendor  // Comparable CPU vendor ID
+	VendorString   string  // Raw vendor string.
+	featureSet     flagSet // Features of the CPU
+	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
+	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+	Family         int     // CPU family number
+	Model          int     // CPU model number
+	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
+	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
+	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
 	Cache          struct {
 		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
 		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
@@ -260,6 +234,7 @@ var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
 var xgetbv func(index uint32) (eax, edx uint32)
 var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+var darwinHasAVX512 = func() bool { return false }
 
 // CPU contains information about the CPU as detected on startup,
 // or when Detect last was called.
@@ -286,352 +261,125 @@ func Detect() {
 	CPU.Cache.L1D = -1
 	CPU.Cache.L2 = -1
 	CPU.Cache.L3 = -1
-	addInfo(&CPU)
-}
-
-// Generated here: http://play.golang.org/p/BxFH2Gdc0G
-
-// Cmov indicates support of CMOV instructions
-func (c CPUInfo) Cmov() bool {
-	return c.Features&CMOV != 0
-}
-
-// Amd3dnow indicates support of AMD 3DNOW! instructions
-func (c CPUInfo) Amd3dnow() bool {
-	return c.Features&AMD3DNOW != 0
-}
-
-// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
-func (c CPUInfo) Amd3dnowExt() bool {
-	return c.Features&AMD3DNOWEXT != 0
-}
-
-// VMX indicates support of VMX
-func (c CPUInfo) VMX() bool {
-	return c.Features&VMX != 0
-}
-
-// MMX indicates support of MMX instructions
-func (c CPUInfo) MMX() bool {
-	return c.Features&MMX != 0
-}
-
-// MMXExt indicates support of MMXEXT instructions
-// (SSE integer functions or AMD MMX ext)
-func (c CPUInfo) MMXExt() bool {
-	return c.Features&MMXEXT != 0
-}
-
-// SSE indicates support of SSE instructions
-func (c CPUInfo) SSE() bool {
-	return c.Features&SSE != 0
-}
-
-// SSE2 indicates support of SSE 2 instructions
-func (c CPUInfo) SSE2() bool {
-	return c.Features&SSE2 != 0
-}
-
-// SSE3 indicates support of SSE 3 instructions
-func (c CPUInfo) SSE3() bool {
-	return c.Features&SSE3 != 0
-}
-
-// SSSE3 indicates support of SSSE 3 instructions
-func (c CPUInfo) SSSE3() bool {
-	return c.Features&SSSE3 != 0
-}
-
-// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
-func (c CPUInfo) SSE4() bool {
-	return c.Features&SSE4 != 0
-}
-
-// SSE42 indicates support of SSE4.2 instructions
-func (c CPUInfo) SSE42() bool {
-	return c.Features&SSE42 != 0
-}
-
-// AVX indicates support of AVX instructions
-// and operating system support of AVX instructions
-func (c CPUInfo) AVX() bool {
-	return c.Features&AVX != 0
-}
-
-// AVX2 indicates support of AVX2 instructions
-func (c CPUInfo) AVX2() bool {
-	return c.Features&AVX2 != 0
-}
-
-// FMA3 indicates support of FMA3 instructions
-func (c CPUInfo) FMA3() bool {
-	return c.Features&FMA3 != 0
-}
-
-// FMA4 indicates support of FMA4 instructions
-func (c CPUInfo) FMA4() bool {
-	return c.Features&FMA4 != 0
-}
-
-// XOP indicates support of XOP instructions
-func (c CPUInfo) XOP() bool {
-	return c.Features&XOP != 0
-}
-
-// F16C indicates support of F16C instructions
-func (c CPUInfo) F16C() bool {
-	return c.Features&F16C != 0
-}
-
-// BMI1 indicates support of BMI1 instructions
-func (c CPUInfo) BMI1() bool {
-	return c.Features&BMI1 != 0
-}
-
-// BMI2 indicates support of BMI2 instructions
-func (c CPUInfo) BMI2() bool {
-	return c.Features&BMI2 != 0
-}
-
-// TBM indicates support of TBM instructions
-// (AMD Trailing Bit Manipulation)
-func (c CPUInfo) TBM() bool {
-	return c.Features&TBM != 0
-}
-
-// Lzcnt indicates support of LZCNT instruction
-func (c CPUInfo) Lzcnt() bool {
-	return c.Features&LZCNT != 0
-}
-
-// Popcnt indicates support of POPCNT instruction
-func (c CPUInfo) Popcnt() bool {
-	return c.Features&POPCNT != 0
-}
-
-// HTT indicates the processor has Hyperthreading enabled
-func (c CPUInfo) HTT() bool {
-	return c.Features&HTT != 0
-}
-
-// SSE2Slow indicates that SSE2 may be slow on this processor
-func (c CPUInfo) SSE2Slow() bool {
-	return c.Features&SSE2SLOW != 0
-}
-
-// SSE3Slow indicates that SSE3 may be slow on this processor
-func (c CPUInfo) SSE3Slow() bool {
-	return c.Features&SSE3SLOW != 0
-}
-
-// AesNi indicates support of AES-NI instructions
-// (Advanced Encryption Standard New Instructions)
-func (c CPUInfo) AesNi() bool {
-	return c.Features&AESNI != 0
-}
-
-// Clmul indicates support of CLMUL instructions
-// (Carry-less Multiplication)
-func (c CPUInfo) Clmul() bool {
-	return c.Features&CLMUL != 0
-}
-
-// NX indicates support of NX (No-Execute) bit
-func (c CPUInfo) NX() bool {
-	return c.Features&NX != 0
-}
-
-// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
-func (c CPUInfo) SSE4A() bool {
-	return c.Features&SSE4A != 0
-}
-
-// HLE indicates support of Hardware Lock Elision
-func (c CPUInfo) HLE() bool {
-	return c.Features&HLE != 0
-}
-
-// RTM indicates support of Restricted Transactional Memory
-func (c CPUInfo) RTM() bool {
-	return c.Features&RTM != 0
-}
-
-// Rdrand indicates support of RDRAND instruction is available
-func (c CPUInfo) Rdrand() bool {
-	return c.Features&RDRAND != 0
-}
-
-// Rdseed indicates support of RDSEED instruction is available
-func (c CPUInfo) Rdseed() bool {
-	return c.Features&RDSEED != 0
-}
-
-// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-func (c CPUInfo) ADX() bool {
-	return c.Features&ADX != 0
-}
-
-// SHA indicates support of Intel SHA Extensions
-func (c CPUInfo) SHA() bool {
-	return c.Features&SHA != 0
-}
-
-// AVX512F indicates support of AVX-512 Foundation
-func (c CPUInfo) AVX512F() bool {
-	return c.Features&AVX512F != 0
-}
-
-// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
-func (c CPUInfo) AVX512DQ() bool {
-	return c.Features&AVX512DQ != 0
-}
-
-// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
-func (c CPUInfo) AVX512IFMA() bool {
-	return c.Features&AVX512IFMA != 0
-}
-
-// AVX512PF indicates support of AVX-512 Prefetch Instructions
-func (c CPUInfo) AVX512PF() bool {
-	return c.Features&AVX512PF != 0
-}
-
-// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
-func (c CPUInfo) AVX512ER() bool {
-	return c.Features&AVX512ER != 0
-}
-
-// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
-func (c CPUInfo) AVX512CD() bool {
-	return c.Features&AVX512CD != 0
-}
-
-// AVX512BW indicates support of AVX-512 Byte and Word Instructions
-func (c CPUInfo) AVX512BW() bool {
-	return c.Features&AVX512BW != 0
-}
-
-// AVX512VL indicates support of AVX-512 Vector Length Extensions
-func (c CPUInfo) AVX512VL() bool {
-	return c.Features&AVX512VL != 0
-}
-
-// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
-func (c CPUInfo) AVX512VBMI() bool {
-	return c.Features&AVX512VBMI != 0
-}
-
-// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
-func (c CPUInfo) AVX512VBMI2() bool {
-	return c.Features&AVX512VBMI2 != 0
-}
-
-// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
-func (c CPUInfo) AVX512VNNI() bool {
-	return c.Features&AVX512VNNI != 0
-}
-
-// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
-func (c CPUInfo) AVX512VPOPCNTDQ() bool {
-	return c.Features&AVX512VPOPCNTDQ != 0
-}
-
-// GFNI indicates support of Galois Field New Instructions
-func (c CPUInfo) GFNI() bool {
-	return c.Features&GFNI != 0
-}
-
-// VAES indicates support of Vector AES
-func (c CPUInfo) VAES() bool {
-	return c.Features&VAES != 0
-}
-
-// AVX512BITALG indicates support of AVX-512 Bit Algorithms
-func (c CPUInfo) AVX512BITALG() bool {
-	return c.Features&AVX512BITALG != 0
-}
-
-// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
-func (c CPUInfo) VPCLMULQDQ() bool {
-	return c.Features&VPCLMULQDQ != 0
-}
-
-// AVX512BF16 indicates support of
-func (c CPUInfo) AVX512BF16() bool {
-	return c.Features&AVX512BF16 != 0
-}
-
-// AVX512VP2INTERSECT indicates support of
-func (c CPUInfo) AVX512VP2INTERSECT() bool {
-	return c.Features&AVX512VP2INTERSECT != 0
+	safe := true
+	if detectArmFlag != nil {
+		safe = !*detectArmFlag
+	}
+	addInfo(&CPU, safe)
+	if displayFeats != nil && *displayFeats {
+		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
+		// Exit with non-zero so tests will print value.
+		os.Exit(1)
+	}
+	if disableFlag != nil {
+		s := strings.Split(*disableFlag, ",")
+		for _, feat := range s {
+			feat := ParseFeature(strings.TrimSpace(feat))
+			if feat != UNKNOWN {
+				CPU.featureSet.unset(feat)
+			}
+		}
+	}
 }
 
-// MPX indicates support of Intel MPX (Memory Protection Extensions)
-func (c CPUInfo) MPX() bool {
-	return c.Features&MPX != 0
+// DetectARM will detect ARM64 features.
+// This is NOT done automatically since it can potentially crash
+// if the OS does not handle the command.
+// If in the future this can be done safely this function may not
+// do anything.
+func DetectARM() {
+	addInfo(&CPU, false)
 }
 
-// ERMS indicates support of Enhanced REP MOVSB/STOSB
-func (c CPUInfo) ERMS() bool {
-	return c.Features&ERMS != 0
-}
+var detectArmFlag *bool
+var displayFeats *bool
+var disableFlag *string
 
-// RDTSCP Instruction is available.
-func (c CPUInfo) RDTSCP() bool {
-	return c.Features&RDTSCP != 0
+// Flags will enable flags.
+// This must be called *before* flag.Parse AND
+// Detect must be called after the flags have been parsed.
+// Note that this means that any detection used in init() functions
+// will not contain these flags.
+func Flags() {
+	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
+	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
+	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
 }
 
-// CX16 indicates if CMPXCHG16B instruction is available.
-func (c CPUInfo) CX16() bool {
-	return c.Features&CX16 != 0
-}
-
-// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
-// So TSX simply checks that.
-func (c CPUInfo) TSX() bool {
-	return c.Features&(HLE|RTM) == HLE|RTM
+// Supports returns whether the CPU supports all of the requested features.
+func (c CPUInfo) Supports(ids ...FeatureID) bool {
+	for _, id := range ids {
+		if !c.featureSet.inSet(id) {
+			return false
+		}
+	}
+	return true
 }
 
-// Atom indicates an Atom processor
-func (c CPUInfo) Atom() bool {
-	return c.Features&ATOM != 0
+// Has allows for checking a single feature.
+// Should be inlined by the compiler.
+func (c CPUInfo) Has(id FeatureID) bool {
+	return c.featureSet.inSet(id)
 }
 
-// Intel returns true if vendor is recognized as Intel
-func (c CPUInfo) Intel() bool {
-	return c.VendorID == Intel
-}
+// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2)
+var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
 
-// AMD returns true if vendor is recognized as AMD
-func (c CPUInfo) AMD() bool {
-	return c.VendorID == AMD
+// X64Level returns the microarchitecture level detected on the CPU.
+// If features are lacking or non x64 mode, 0 is returned.
+// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+func (c CPUInfo) X64Level() int {
+	if c.featureSet.hasSet(level4Features) {
+		return 4
+	}
+	if c.featureSet.hasSet(level3Features) {
+		return 3
+	}
+	if c.featureSet.hasSet(level2Features) {
+		return 2
+	}
+	if c.featureSet.hasSet(level1Features) {
+		return 1
+	}
+	return 0
 }
 
-// Hygon returns true if vendor is recognized as Hygon
-func (c CPUInfo) Hygon() bool {
-	return c.VendorID == Hygon
+// Disable will disable one or several features.
+func (c *CPUInfo) Disable(ids ...FeatureID) bool {
+	for _, id := range ids {
+		c.featureSet.unset(id)
+	}
+	return true
 }
 
-// Transmeta returns true if vendor is recognized as Transmeta
-func (c CPUInfo) Transmeta() bool {
-	return c.VendorID == Transmeta
+// Enable will disable one or several features even if they were undetected.
+// This is of course not recommended for obvious reasons.
+func (c *CPUInfo) Enable(ids ...FeatureID) bool {
+	for _, id := range ids {
+		c.featureSet.set(id)
+	}
+	return true
 }
 
-// NSC returns true if vendor is recognized as National Semiconductor
-func (c CPUInfo) NSC() bool {
-	return c.VendorID == NSC
+// IsVendor returns true if vendor is recognized as Intel
+func (c CPUInfo) IsVendor(v Vendor) bool {
+	return c.VendorID == v
 }
 
-// VIA returns true if vendor is recognized as VIA
-func (c CPUInfo) VIA() bool {
-	return c.VendorID == VIA
+func (c CPUInfo) FeatureSet() []string {
+	s := make([]string, 0)
+	s = append(s, c.featureSet.Strings()...)
+	return s
 }
 
 // RTCounter returns the 64-bit time-stamp counter
 // Uses the RDTSCP instruction. The value 0 is returned
 // if the CPU does not support the instruction.
 func (c CPUInfo) RTCounter() uint64 {
-	if !c.RDTSCP() {
+	if !c.Supports(RDTSCP) {
 		return 0
 	}
 	a, _, _, d := rdtscpAsm()
@@ -643,7 +391,7 @@ func (c CPUInfo) RTCounter() uint64 {
 // about the current cpu/core the code is running on.
 // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
 func (c CPUInfo) Ia32TscAux() uint32 {
-	if !c.RDTSCP() {
+	if !c.Supports(RDTSCP) {
 		return 0
 	}
 	_, _, ecx, _ := rdtscpAsm()
@@ -662,25 +410,42 @@ func (c CPUInfo) LogicalCPU() int {
 	return int(ebx >> 24)
 }
 
-// hertz tries to compute the clock speed of the CPU. If leaf 15 is
+// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
 // supported, use it, otherwise parse the brand string. Yes, really.
-func hertz(model string) int64 {
+func (c *CPUInfo) frequencies() {
+	c.Hz, c.BoostFreq = 0, 0
 	mfi := maxFunctionID()
 	if mfi >= 0x15 {
 		eax, ebx, ecx, _ := cpuid(0x15)
 		if eax != 0 && ebx != 0 && ecx != 0 {
-			return int64((int64(ecx) * int64(ebx)) / int64(eax))
+			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
 		}
 	}
+	if mfi >= 0x16 {
+		a, b, _, _ := cpuid(0x16)
+		// Base...
+		if a&0xffff > 0 {
+			c.Hz = int64(a&0xffff) * 1_000_000
+		}
+		// Boost...
+		if b&0xffff > 0 {
+			c.BoostFreq = int64(b&0xffff) * 1_000_000
+		}
+	}
+	if c.Hz > 0 {
+		return
+	}
+
 	// computeHz determines the official rated speed of a CPU from its brand
 	// string. This insanity is *actually the official documented way to do
 	// this according to Intel*, prior to leaf 0x15 existing. The official
 	// documentation only shows this working for exactly `x.xx` or `xxxx`
 	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
 	// sizes.
+	model := c.BrandName
 	hz := strings.LastIndex(model, "Hz")
 	if hz < 3 {
-		return -1
+		return
 	}
 	var multiplier int64
 	switch model[hz-1] {
@@ -692,7 +457,7 @@ func hertz(model string) int64 {
 		multiplier = 1000 * 1000 * 1000 * 1000
 	}
 	if multiplier == 0 {
-		return -1
+		return
 	}
 	freq := int64(0)
 	divisor := int64(0)
@@ -704,77 +469,114 @@ func hertz(model string) int64 {
 			decimalShift *= 10
 		} else if model[i] == '.' {
 			if divisor != 0 {
-				return -1
+				return
 			}
 			divisor = decimalShift
 		} else {
-			return -1
+			return
 		}
 	}
 	// we didn't find a space
 	if i < 0 {
-		return -1
+		return
 	}
 	if divisor != 0 {
-		return (freq * multiplier) / divisor
+		c.Hz = (freq * multiplier) / divisor
+		return
 	}
-	return freq * multiplier
+	c.Hz = freq * multiplier
 }
 
 // VM Will return true if the cpu id indicates we are in
-// a virtual machine. This is only a hint, and will very likely
-// have many false negatives.
+// a virtual machine.
 func (c CPUInfo) VM() bool {
-	switch c.VendorID {
-	case MSVM, KVM, VMware, XenHVM, Bhyve:
-		return true
-	}
-	return false
+	return CPU.featureSet.inSet(HYPERVISOR)
 }
 
-// Flags contains detected cpu features and characteristics
-type Flags uint64
+// flags contains detected cpu features and characteristics
+type flags uint64
+
+// log2(bits_in_uint64)
+const flagBitsLog2 = 6
+const flagBits = 1 << flagBitsLog2
+const flagMask = flagBits - 1
 
-// ArmFlags contains detected ARM cpu features and characteristics
-type ArmFlags uint64
+// flagSet contains detected cpu features and characteristics in an array of flags
+type flagSet [(lastID + flagMask) / flagBits]flags
 
-// String returns a string representation of the detected
-// CPU features.
-func (f Flags) String() string {
-	return strings.Join(f.Strings(), ",")
+func (s flagSet) inSet(feat FeatureID) bool {
+	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
 }
 
-// Strings returns an array of the detected features.
-func (f Flags) Strings() []string {
-	r := make([]string, 0, 20)
-	for i := uint(0); i < 64; i++ {
-		key := Flags(1 << i)
-		val := flagNames[key]
-		if f&key != 0 {
-			r = append(r, val)
+func (s *flagSet) set(feat FeatureID) {
+	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
+}
+
+// setIf will set a feature if boolean is true.
+func (s *flagSet) setIf(cond bool, features ...FeatureID) {
+	if cond {
+		for _, offset := range features {
+			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
 		}
 	}
-	return r
 }
 
-// String returns a string representation of the detected
-// CPU features.
-func (f ArmFlags) String() string {
-	return strings.Join(f.Strings(), ",")
+func (s *flagSet) unset(offset FeatureID) {
+	bit := flags(1 << (offset & flagMask))
+	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
 }
 
-// Strings returns an array of the detected features.
-func (f ArmFlags) Strings() []string {
-	r := make([]string, 0, 20)
-	for i := uint(0); i < 64; i++ {
-		key := ArmFlags(1 << i)
-		val := flagNamesArm[key]
-		if f&key != 0 {
-			r = append(r, val)
+// or with another flagset.
+func (s *flagSet) or(other flagSet) {
+	for i, v := range other[:] {
+		s[i] |= v
+	}
+}
+
+// hasSet returns whether all features are present.
+func (s flagSet) hasSet(other flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != v {
+			return false
+		}
+	}
+	return true
+}
+
+func flagSetWith(feat ...FeatureID) flagSet {
+	var res flagSet
+	for _, f := range feat {
+		res.set(f)
+	}
+	return res
+}
+
+// ParseFeature will parse the string and return the ID of the matching feature.
+// Will return UNKNOWN if not found.
+func ParseFeature(s string) FeatureID {
+	s = strings.ToUpper(s)
+	for i := firstID; i < lastID; i++ {
+		if i.String() == s {
+			return i
+		}
+	}
+	return UNKNOWN
+}
+
+// Strings returns an array of the detected features for FlagsSet.
+func (s flagSet) Strings() []string {
+	if len(s) == 0 {
+		return []string{""}
+	}
+	r := make([]string, 0)
+	for i := firstID; i < lastID; i++ {
+		if s.inSet(i) {
+			r = append(r, i.String())
 		}
 	}
 	return r
 }
+
 func maxExtendedFunction() uint32 {
 	eax, _, _, _ := cpuid(0x80000000)
 	return eax
@@ -826,6 +628,15 @@ func threadsPerCore() int {
 	}
 	_, b, _, _ := cpuidex(0xb, 0)
 	if b&0xffff == 0 {
+		if vend == AMD {
+			// Workaround for AMD returning 0, assume 2 if >= Zen 2
+			// It will be more correct than not.
+			fam, _ := familyModel()
+			_, _, _, d := cpuid(1)
+			if (d&(1<<28)) != 0 && fam >= 23 {
+				return 2
+			}
+		}
 		return 1
 	}
 	return int(b & 0xffff)
@@ -879,11 +690,13 @@ func physicalCores() int {
 		if lc > 0 && tpc > 0 {
 			return lc / tpc
 		}
-		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
 
+		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
 		if maxExtendedFunction() >= 0x80000008 {
 			_, _, c, _ := cpuid(0x80000008)
-			return int(c&0xff) + 1
+			if c&0xff > 0 {
+				return int(c&0xff) + 1
+			}
 		}
 	}
 	return 0
@@ -916,7 +729,7 @@ func vendorID() (Vendor, string) {
 	v := string(valAsString(b, d, c))
 	vend, ok := vendorMapping[v]
 	if !ok {
-		return Other, v
+		return VendorUnknown, v
 	}
 	return vend, v
 }
@@ -947,6 +760,7 @@ func (c *CPUInfo) cacheSize() {
 		if maxFunctionID() < 4 {
 			return
 		}
+		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
 		for i := uint32(0); ; i++ {
 			eax, ebx, ecx, _ := cpuidex(4, i)
 			cacheType := eax & 15
@@ -1039,8 +853,6 @@ func (c *CPUInfo) cacheSize() {
 			}
 		}
 	}
-
-	return
 }
 
 type SGXEPCSection struct {
@@ -1094,130 +906,108 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
 	return
 }
 
-func support() Flags {
+func support() flagSet {
+	var fs flagSet
 	mfi := maxFunctionID()
 	vend, _ := vendorID()
 	if mfi < 0x1 {
-		return 0
+		return fs
 	}
-	rval := uint64(0)
+	family, model := familyModel()
+
 	_, _, c, d := cpuid(1)
-	if (d & (1 << 15)) != 0 {
-		rval |= CMOV
-	}
-	if (d & (1 << 23)) != 0 {
-		rval |= MMX
-	}
-	if (d & (1 << 25)) != 0 {
-		rval |= MMXEXT
-	}
-	if (d & (1 << 25)) != 0 {
-		rval |= SSE
-	}
-	if (d & (1 << 26)) != 0 {
-		rval |= SSE2
-	}
-	if (c & 1) != 0 {
-		rval |= SSE3
-	}
-	if (c & (1 << 5)) != 0 {
-		rval |= VMX
-	}
-	if (c & 0x00000200) != 0 {
-		rval |= SSSE3
-	}
-	if (c & 0x00080000) != 0 {
-		rval |= SSE4
-	}
-	if (c & 0x00100000) != 0 {
-		rval |= SSE42
-	}
-	if (c & (1 << 25)) != 0 {
-		rval |= AESNI
-	}
-	if (c & (1 << 1)) != 0 {
-		rval |= CLMUL
-	}
-	if c&(1<<23) != 0 {
-		rval |= POPCNT
-	}
-	if c&(1<<30) != 0 {
-		rval |= RDRAND
-	}
-	if c&(1<<29) != 0 {
-		rval |= F16C
-	}
-	if c&(1<<13) != 0 {
-		rval |= CX16
-	}
+	fs.setIf((d&(1<<0)) != 0, X87)
+	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
+	fs.setIf((d&(1<<11)) != 0, SCE)
+	fs.setIf((d&(1<<15)) != 0, CMOV)
+	fs.setIf((d&(1<<22)) != 0, MMXEXT)
+	fs.setIf((d&(1<<23)) != 0, MMX)
+	fs.setIf((d&(1<<24)) != 0, FXSR)
+	fs.setIf((d&(1<<25)) != 0, FXSROPT)
+	fs.setIf((d&(1<<25)) != 0, SSE)
+	fs.setIf((d&(1<<26)) != 0, SSE2)
+	fs.setIf((c&1) != 0, SSE3)
+	fs.setIf((c&(1<<5)) != 0, VMX)
+	fs.setIf((c&0x00000200) != 0, SSSE3)
+	fs.setIf((c&0x00080000) != 0, SSE4)
+	fs.setIf((c&0x00100000) != 0, SSE42)
+	fs.setIf((c&(1<<25)) != 0, AESNI)
+	fs.setIf((c&(1<<1)) != 0, CLMUL)
+	fs.setIf(c&(1<<22) != 0, MOVBE)
+	fs.setIf(c&(1<<23) != 0, POPCNT)
+	fs.setIf(c&(1<<30) != 0, RDRAND)
+
+	// This bit has been reserved by Intel & AMD for use by hypervisors,
+	// and indicates the presence of a hypervisor.
+	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
+	fs.setIf(c&(1<<29) != 0, F16C)
+	fs.setIf(c&(1<<13) != 0, CX16)
+
 	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
-		if threadsPerCore() > 1 {
-			rval |= HTT
-		}
+		fs.setIf(threadsPerCore() > 1, HTT)
 	}
 	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
-		if threadsPerCore() > 1 {
-			rval |= HTT
-		}
+		fs.setIf(threadsPerCore() > 1, HTT)
 	}
-	// Check XGETBV, OXSAVE and AVX bits
-	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
+	fs.setIf(c&1<<26 != 0, XSAVE)
+	fs.setIf(c&1<<27 != 0, OSXSAVE)
+	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
+	const avxCheck = 1<<26 | 1<<27 | 1<<28
+	if c&avxCheck == avxCheck {
 		// Check for OS support
 		eax, _ := xgetbv(0)
 		if (eax & 0x6) == 0x6 {
-			rval |= AVX
-			if (c & 0x00001000) != 0 {
-				rval |= FMA3
+			fs.set(AVX)
+			switch vend {
+			case Intel:
+				// Older than Haswell.
+				fs.setIf(family == 6 && model < 60, AVXSLOW)
+			case AMD:
+				// Older than Zen 2
+				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
 			}
 		}
 	}
+	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
+	// fma3 and OSXSAVE needed.
+	const fma3Check = 1<<12 | 1<<27
+	fs.setIf(c&fma3Check == fma3Check, FMA3)
 
 	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
 	if mfi >= 7 {
 		_, ebx, ecx, edx := cpuidex(7, 0)
 		eax1, _, _, _ := cpuidex(7, 1)
-		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
-			rval |= AVX2
+		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
+			fs.set(AVX2)
 		}
+		// CPUID.(EAX=7, ECX=0).EBX
 		if (ebx & 0x00000008) != 0 {
-			rval |= BMI1
-			if (ebx & 0x00000100) != 0 {
-				rval |= BMI2
-			}
-		}
-		if ebx&(1<<2) != 0 {
-			rval |= SGX
-		}
-		if ebx&(1<<4) != 0 {
-			rval |= HLE
-		}
-		if ebx&(1<<9) != 0 {
-			rval |= ERMS
-		}
-		if ebx&(1<<11) != 0 {
-			rval |= RTM
-		}
-		if ebx&(1<<14) != 0 {
-			rval |= MPX
-		}
-		if ebx&(1<<18) != 0 {
-			rval |= RDSEED
-		}
-		if ebx&(1<<19) != 0 {
-			rval |= ADX
-		}
-		if ebx&(1<<29) != 0 {
-			rval |= SHA
-		}
-		if edx&(1<<26) != 0 {
-			rval |= IBPB
-		}
-		if ecx&(1<<30) != 0 {
-			rval |= SGXLC
-		}
-		if edx&(1<<27) != 0 {
-			rval |= STIBP
+			fs.set(BMI1)
+			fs.setIf((ebx&0x00000100) != 0, BMI2)
 		}
+		fs.setIf(ebx&(1<<2) != 0, SGX)
+		fs.setIf(ebx&(1<<4) != 0, HLE)
+		fs.setIf(ebx&(1<<9) != 0, ERMS)
+		fs.setIf(ebx&(1<<11) != 0, RTM)
+		fs.setIf(ebx&(1<<14) != 0, MPX)
+		fs.setIf(ebx&(1<<18) != 0, RDSEED)
+		fs.setIf(ebx&(1<<19) != 0, ADX)
+		fs.setIf(ebx&(1<<29) != 0, SHA)
+		// CPUID.(EAX=7, ECX=0).ECX
+		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
+		fs.setIf(ecx&(1<<7) != 0, CETSS)
+		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
+		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
+		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
+		fs.setIf(ecx&(1<<30) != 0, SGXLC)
+		// CPUID.(EAX=7, ECX=0).EDX
+		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
+		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+		fs.setIf(edx&(1<<20) != 0, CETIBT)
+		fs.setIf(edx&(1<<26) != 0, IBPB)
+		fs.setIf(edx&(1<<27) != 0, STIBP)
 
 		// Only detect AVX-512 features if XGETBV is supported
 		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
@@ -1227,64 +1017,36 @@ func support() Flags {
 			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
 			// ZMM16-ZMM31 state are enabled by OS)
 			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
-			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
-				if ebx&(1<<16) != 0 {
-					rval |= AVX512F
-				}
-				if ebx&(1<<17) != 0 {
-					rval |= AVX512DQ
-				}
-				if ebx&(1<<21) != 0 {
-					rval |= AVX512IFMA
-				}
-				if ebx&(1<<26) != 0 {
-					rval |= AVX512PF
-				}
-				if ebx&(1<<27) != 0 {
-					rval |= AVX512ER
-				}
-				if ebx&(1<<28) != 0 {
-					rval |= AVX512CD
-				}
-				if ebx&(1<<30) != 0 {
-					rval |= AVX512BW
-				}
-				if ebx&(1<<31) != 0 {
-					rval |= AVX512VL
-				}
+			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
+			if runtime.GOOS == "darwin" {
+				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
+			}
+			if hasAVX512 {
+				fs.setIf(ebx&(1<<16) != 0, AVX512F)
+				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
+				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
+				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
+				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
+				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
+				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
+				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
 				// ecx
-				if ecx&(1<<1) != 0 {
-					rval |= AVX512VBMI
-				}
-				if ecx&(1<<6) != 0 {
-					rval |= AVX512VBMI2
-				}
-				if ecx&(1<<8) != 0 {
-					rval |= GFNI
-				}
-				if ecx&(1<<9) != 0 {
-					rval |= VAES
-				}
-				if ecx&(1<<10) != 0 {
-					rval |= VPCLMULQDQ
-				}
-				if ecx&(1<<11) != 0 {
-					rval |= AVX512VNNI
-				}
-				if ecx&(1<<12) != 0 {
-					rval |= AVX512BITALG
-				}
-				if ecx&(1<<14) != 0 {
-					rval |= AVX512VPOPCNTDQ
-				}
+				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
+				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
+				fs.setIf(ecx&(1<<8) != 0, GFNI)
+				fs.setIf(ecx&(1<<9) != 0, VAES)
+				fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
+				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
+				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
 				// edx
-				if edx&(1<<8) != 0 {
-					rval |= AVX512VP2INTERSECT
-				}
-				// cpuid eax 07h,ecx=1
-				if eax1&(1<<5) != 0 {
-					rval |= AVX512BF16
-				}
+				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
+				fs.setIf(edx&(1<<22) != 0, AMXBF16)
+				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
+				fs.setIf(edx&(1<<24) != 0, AMXTILE)
+				fs.setIf(edx&(1<<25) != 0, AMXINT8)
+				// eax1 = CPUID.(EAX=7, ECX=1).EAX
+				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
 			}
 		}
 	}
@@ -1292,78 +1054,59 @@ func support() Flags {
 	if maxExtendedFunction() >= 0x80000001 {
 		_, _, c, d := cpuid(0x80000001)
 		if (c & (1 << 5)) != 0 {
-			rval |= LZCNT
-			rval |= POPCNT
-		}
-		if (d & (1 << 31)) != 0 {
-			rval |= AMD3DNOW
-		}
-		if (d & (1 << 30)) != 0 {
-			rval |= AMD3DNOWEXT
-		}
-		if (d & (1 << 23)) != 0 {
-			rval |= MMX
-		}
-		if (d & (1 << 22)) != 0 {
-			rval |= MMXEXT
-		}
-		if (c & (1 << 6)) != 0 {
-			rval |= SSE4A
-		}
-		if d&(1<<20) != 0 {
-			rval |= NX
-		}
-		if d&(1<<27) != 0 {
-			rval |= RDTSCP
-		}
-
-		/* Allow for selectively disabling SSE2 functions on AMD processors
-		   with SSE2 support but not SSE4a. This includes Athlon64, some
-		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
-		   than SSE2 often enough to utilize this special-case flag.
-		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
-		   so that SSE2 is used unless explicitly disabled by checking
-		   AV_CPU_FLAG_SSE2SLOW. */
-		if vend != Intel &&
-			rval&SSE2 != 0 && (c&0x00000040) == 0 {
-			rval |= SSE2SLOW
+			fs.set(LZCNT)
+			fs.set(POPCNT)
 		}
+		fs.setIf((c&(1<<0)) != 0, LAHF)
+		fs.setIf((c&(1<<10)) != 0, IBS)
+		fs.setIf((d&(1<<31)) != 0, AMD3DNOW)
+		fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT)
+		fs.setIf((d&(1<<23)) != 0, MMX)
+		fs.setIf((d&(1<<22)) != 0, MMXEXT)
+		fs.setIf((c&(1<<6)) != 0, SSE4A)
+		fs.setIf(d&(1<<20) != 0, NX)
+		fs.setIf(d&(1<<27) != 0, RDTSCP)
 
 		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
 		 * used unless the OS has AVX support. */
-		if (rval & AVX) != 0 {
-			if (c & 0x00000800) != 0 {
-				rval |= XOP
-			}
-			if (c & 0x00010000) != 0 {
-				rval |= FMA4
-			}
+		if fs.inSet(AVX) {
+			fs.setIf((c&0x00000800) != 0, XOP)
+			fs.setIf((c&0x00010000) != 0, FMA4)
 		}
 
-		if vend == Intel {
-			family, model := familyModel()
-			if family == 6 && (model == 9 || model == 13 || model == 14) {
-				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
-				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
-				 * usually slower than mmx. */
-				if (rval & SSE2) != 0 {
-					rval |= SSE2SLOW
-				}
-				if (rval & SSE3) != 0 {
-					rval |= SSE3SLOW
-				}
-			}
-			/* The Atom processor has SSSE3 support, which is useful in many cases,
-			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
-			 * on the Atom, but is generally faster on other processors supporting
-			 * SSSE3. This flag allows for selectively disabling certain SSSE3
-			 * functions on the Atom. */
-			if family == 6 && model == 28 {
-				rval |= ATOM
-			}
-		}
 	}
-	return Flags(rval)
+	if maxExtendedFunction() >= 0x80000007 {
+		_, b, _, d := cpuid(0x80000007)
+		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
+		fs.setIf((b&(1<<1)) != 0, SUCCOR)
+		fs.setIf((b&(1<<2)) != 0, HWA)
+		fs.setIf((d&(1<<9)) != 0, CPBOOST)
+	}
+
+	if maxExtendedFunction() >= 0x80000008 {
+		_, b, _, _ := cpuid(0x80000008)
+		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
+		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
+		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+		fs.setIf((b&(1<<4)) != 0, RDPRU)
+		fs.setIf((b&(1<<3)) != 0, INVLPGB)
+		fs.setIf((b&(1<<1)) != 0, MSRIRC)
+		fs.setIf((b&(1<<0)) != 0, CLZERO)
+	}
+
+	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
+		eax, _, _, _ := cpuid(0x8000001b)
+		fs.setIf((eax>>0)&1 == 1, IBSFFV)
+		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
+		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
+		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
+		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
+		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
+		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
+		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+	}
+
+	return fs
 }
 
 func valAsString(values ...uint32) []byte {
@@ -1387,118 +1130,3 @@ func valAsString(values ...uint32) []byte {
 	}
 	return r
 }
-
-// Single-precision and double-precision floating point
-func (c CPUInfo) ArmFP() bool {
-	return c.Arm&FP != 0
-}
-
-// Advanced SIMD
-func (c CPUInfo) ArmASIMD() bool {
-	return c.Arm&ASIMD != 0
-}
-
-// Generic timer
-func (c CPUInfo) ArmEVTSTRM() bool {
-	return c.Arm&EVTSTRM != 0
-}
-
-// AES instructions
-func (c CPUInfo) ArmAES() bool {
-	return c.Arm&AES != 0
-}
-
-// Polynomial Multiply instructions (PMULL/PMULL2)
-func (c CPUInfo) ArmPMULL() bool {
-	return c.Arm&PMULL != 0
-}
-
-// SHA-1 instructions (SHA1C, etc)
-func (c CPUInfo) ArmSHA1() bool {
-	return c.Arm&SHA1 != 0
-}
-
-// SHA-2 instructions (SHA256H, etc)
-func (c CPUInfo) ArmSHA2() bool {
-	return c.Arm&SHA2 != 0
-}
-
-// CRC32/CRC32C instructions
-func (c CPUInfo) ArmCRC32() bool {
-	return c.Arm&CRC32 != 0
-}
-
-// Large System Extensions (LSE)
-func (c CPUInfo) ArmATOMICS() bool {
-	return c.Arm&ATOMICS != 0
-}
-
-// Half-precision floating point
-func (c CPUInfo) ArmFPHP() bool {
-	return c.Arm&FPHP != 0
-}
-
-// Advanced SIMD half-precision floating point
-func (c CPUInfo) ArmASIMDHP() bool {
-	return c.Arm&ASIMDHP != 0
-}
-
-// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-func (c CPUInfo) ArmASIMDRDM() bool {
-	return c.Arm&ASIMDRDM != 0
-}
-
-// Javascript-style double->int convert (FJCVTZS)
-func (c CPUInfo) ArmJSCVT() bool {
-	return c.Arm&JSCVT != 0
-}
-
-// Floatin point complex number addition and multiplication
-func (c CPUInfo) ArmFCMA() bool {
-	return c.Arm&FCMA != 0
-}
-
-// Weaker release consistency (LDAPR, etc)
-func (c CPUInfo) ArmLRCPC() bool {
-	return c.Arm&LRCPC != 0
-}
-
-// Data cache clean to Point of Persistence (DC CVAP)
-func (c CPUInfo) ArmDCPOP() bool {
-	return c.Arm&DCPOP != 0
-}
-
-// SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-func (c CPUInfo) ArmSHA3() bool {
-	return c.Arm&SHA3 != 0
-}
-
-// SM3 instructions
-func (c CPUInfo) ArmSM3() bool {
-	return c.Arm&SM3 != 0
-}
-
-// SM4 instructions
-func (c CPUInfo) ArmSM4() bool {
-	return c.Arm&SM4 != 0
-}
-
-// SIMD Dot Product
-func (c CPUInfo) ArmASIMDDP() bool {
-	return c.Arm&ASIMDDP != 0
-}
-
-// SHA512 instructions
-func (c CPUInfo) ArmSHA512() bool {
-	return c.Arm&SHA512 != 0
-}
-
-// Scalable Vector Extension
-func (c CPUInfo) ArmSVE() bool {
-	return c.Arm&SVE != 0
-}
-
-// Generic Pointer Authentication
-func (c CPUInfo) ArmGPA() bool {
-	return c.Arm&GPA != 0
-}
diff --git a/cpuid_386.s b/cpuid_386.s
index 089638f..8587c3a 100644
--- a/cpuid_386.s
+++ b/cpuid_386.s
@@ -40,3 +40,8 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
 	MOVL CX, ecx+8(FP)
 	MOVL DX, edx+12(FP)
 	RET
+
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0
+	MOVL $0, eax+0(FP)
+	RET
diff --git a/cpuid_amd64.s b/cpuid_amd64.s
index 3ba0559..bc11f89 100644
--- a/cpuid_amd64.s
+++ b/cpuid_amd64.s
@@ -40,3 +40,33 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
 	MOVL CX, ecx+8(FP)
 	MOVL DX, edx+12(FP)
 	RET
+
+// From https://go-review.googlesource.com/c/sys/+/285572/
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
+	MOVB $0, ret+0(FP) // default to false
+
+#ifdef GOOS_darwin // return if not darwin
+#ifdef GOARCH_amd64 // return if not amd64
+// These values from:
+// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+#define commpage64_base_address         0x00007fffffe00000
+#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
+#define commpage64_version              (commpage64_base_address+0x01E)
+#define hasAVX512F                      0x0000004000000000
+	MOVQ $commpage64_version, BX
+	MOVW (BX), AX
+	CMPW AX, $13                            // versions < 13 do not support AVX512
+	JL   no_avx512
+	MOVQ $commpage64_cpu_capabilities64, BX
+	MOVQ (BX), AX
+	MOVQ $hasAVX512F, CX
+	ANDQ CX, AX
+	JZ   no_avx512
+	MOVB $1, ret+0(FP)
+
+no_avx512:
+#endif
+#endif
+	RET
+
diff --git a/cpuid_arm64.s b/cpuid_arm64.s
index 8975ee8..b31d6ae 100644
--- a/cpuid_arm64.s
+++ b/cpuid_arm64.s
@@ -1,6 +1,6 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build arm64,!gccgo
+//+build arm64,!gccgo,!noasm,!appengine
 
 // See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
 
diff --git a/cpuid_test.go b/cpuid_test.go
index a2f39f5..5b810b6 100644
--- a/cpuid_test.go
+++ b/cpuid_test.go
@@ -4,12 +4,26 @@ package cpuid
 
 import (
 	"fmt"
+	"strings"
 	"testing"
 )
 
+func TestLastID(t *testing.T) {
+	if lastID.String() != "lastID" {
+		t.Fatal("stringer not updated, run go generate")
+	}
+}
+
+func TestLastVendorID(t *testing.T) {
+	if lastVendor.String() != "lastVendor" {
+		t.Fatal("stringer not updated, run go generate")
+	}
+}
+
 // There is no real way to test a CPU identifier, since results will
 // obviously differ on each machine.
 func TestCPUID(t *testing.T) {
+	Detect()
 	n := maxFunctionID()
 	t.Logf("Max Function:0x%x", n)
 	n = maxExtendedFunction()
@@ -21,19 +35,38 @@ func TestCPUID(t *testing.T) {
 	t.Log("ThreadsPerCore:", CPU.ThreadsPerCore)
 	t.Log("LogicalCores:", CPU.LogicalCores)
 	t.Log("Family", CPU.Family, "Model:", CPU.Model)
-	t.Log("Features:", CPU.Features)
+	t.Log("Features:", strings.Join(CPU.FeatureSet(), ","))
 	t.Log("Cacheline bytes:", CPU.CacheLine)
 	t.Log("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
 	t.Log("L1 Data Cache:", CPU.Cache.L1D, "bytes")
 	t.Log("L2 Cache:", CPU.Cache.L2, "bytes")
 	t.Log("L3 Cache:", CPU.Cache.L3, "bytes")
 	t.Log("Hz:", CPU.Hz, "Hz")
+	t.Log("VM:", CPU.VM())
+	t.Log("BoostFreq:", CPU.BoostFreq, "Hz")
+}
+
+func TestExample(t *testing.T) {
+	Detect()
+	// Print basic CPU information:
+	fmt.Println("Name:", CPU.BrandName)
+	fmt.Println("PhysicalCores:", CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", CPU.LogicalCores)
+	fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
+	fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
+	fmt.Println("Cacheline bytes:", CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1D, "bytes")
+	fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
+	fmt.Println("Frequency", CPU.Hz, "hz")
 
-	if CPU.SSE2() {
-		t.Log("We have SSE2")
+	// Test if we have these specific features:
+	if CPU.Supports(SSE, SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
 	}
 }
-
 func TestDumpCPUID(t *testing.T) {
 	n := int(maxFunctionID())
 	for i := 0; i <= n; i++ {
@@ -64,13 +97,8 @@ func Example() {
 	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
 	fmt.Println("LogicalCores:", CPU.LogicalCores)
 	fmt.Println("Family", CPU.Family, "Model:", CPU.Model)
-	fmt.Println("Features:", CPU.Features)
+	fmt.Println("Features:", CPU.FeatureSet())
 	fmt.Println("Cacheline bytes:", CPU.CacheLine)
-
-	// Test if we have a specific feature:
-	if CPU.SSE() {
-		fmt.Println("We have Streaming SIMD Extensions")
-	}
 }
 
 func TestBrandNameZero(t *testing.T) {
@@ -85,202 +113,10 @@ func TestBrandNameZero(t *testing.T) {
 	}
 }
 
-// Generated here: http://play.golang.org/p/mko-0tFt0Q
-
-// TestCmov tests Cmov() function
-func TestCmov(t *testing.T) {
-	got := CPU.Cmov()
-	expected := CPU.Features&CMOV == CMOV
-	if got != expected {
-		t.Fatalf("Cmov: expected %v, got %v", expected, got)
-	}
-	t.Log("CMOV Support:", got)
-}
-
-// TestAmd3dnow tests Amd3dnow() function
-func TestAmd3dnow(t *testing.T) {
-	got := CPU.Amd3dnow()
-	expected := CPU.Features&AMD3DNOW == AMD3DNOW
-	if got != expected {
-		t.Fatalf("Amd3dnow: expected %v, got %v", expected, got)
-	}
-	t.Log("AMD3DNOW Support:", got)
-}
-
-// TestAmd3dnowExt tests Amd3dnowExt() function
-func TestAmd3dnowExt(t *testing.T) {
-	got := CPU.Amd3dnowExt()
-	expected := CPU.Features&AMD3DNOWEXT == AMD3DNOWEXT
-	if got != expected {
-		t.Fatalf("Amd3dnowExt: expected %v, got %v", expected, got)
-	}
-	t.Log("AMD3DNOWEXT Support:", got)
-}
-
-// TestVMX tests VMX() function
-func TestVMX(t *testing.T) {
-	got := CPU.VMX()
-	expected := CPU.Features&VMX == VMX
-	if got != expected {
-		t.Fatalf("VMX: expected %v, got %v", expected, got)
-	}
-	t.Log("VMX Support:", got)
-}
-
-// TestMMX tests MMX() function
-func TestMMX(t *testing.T) {
-	got := CPU.MMX()
-	expected := CPU.Features&MMX == MMX
-	if got != expected {
-		t.Fatalf("MMX: expected %v, got %v", expected, got)
-	}
-	t.Log("MMX Support:", got)
-}
-
-// TestMMXext tests MMXext() function
-func TestMMXext(t *testing.T) {
-	got := CPU.MMXExt()
-	expected := CPU.Features&MMXEXT == MMXEXT
-	if got != expected {
-		t.Fatalf("MMXExt: expected %v, got %v", expected, got)
-	}
-	t.Log("MMXEXT Support:", got)
-}
-
-// TestSSE tests SSE() function
-func TestSSE(t *testing.T) {
-	got := CPU.SSE()
-	expected := CPU.Features&SSE == SSE
-	if got != expected {
-		t.Fatalf("SSE: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE Support:", got)
-}
-
-// TestSSE2 tests SSE2() function
-func TestSSE2(t *testing.T) {
-	got := CPU.SSE2()
-	expected := CPU.Features&SSE2 == SSE2
-	if got != expected {
-		t.Fatalf("SSE2: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE2 Support:", got)
-}
-
-// TestSSE3 tests SSE3() function
-func TestSSE3(t *testing.T) {
-	got := CPU.SSE3()
-	expected := CPU.Features&SSE3 == SSE3
-	if got != expected {
-		t.Fatalf("SSE3: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE3 Support:", got)
-}
-
-// TestSSSE3 tests SSSE3() function
-func TestSSSE3(t *testing.T) {
-	got := CPU.SSSE3()
-	expected := CPU.Features&SSSE3 == SSSE3
-	if got != expected {
-		t.Fatalf("SSSE3: expected %v, got %v", expected, got)
-	}
-	t.Log("SSSE3 Support:", got)
-}
-
-// TestSSE4 tests SSE4() function
-func TestSSE4(t *testing.T) {
-	got := CPU.SSE4()
-	expected := CPU.Features&SSE4 == SSE4
-	if got != expected {
-		t.Fatalf("SSE4: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE4 Support:", got)
-}
-
-// TestSSE42 tests SSE42() function
-func TestSSE42(t *testing.T) {
-	got := CPU.SSE42()
-	expected := CPU.Features&SSE42 == SSE42
-	if got != expected {
-		t.Fatalf("SSE42: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE42 Support:", got)
-}
-
-// TestAVX tests AVX() function
-func TestAVX(t *testing.T) {
-	got := CPU.AVX()
-	expected := CPU.Features&AVX == AVX
-	if got != expected {
-		t.Fatalf("AVX: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX Support:", got)
-}
-
-// TestAVX2 tests AVX2() function
-func TestAVX2(t *testing.T) {
-	got := CPU.AVX2()
-	expected := CPU.Features&AVX2 == AVX2
-	if got != expected {
-		t.Fatalf("AVX2: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX2 Support:", got)
-}
-
-// TestFMA3 tests FMA3() function
-func TestFMA3(t *testing.T) {
-	got := CPU.FMA3()
-	expected := CPU.Features&FMA3 == FMA3
-	if got != expected {
-		t.Fatalf("FMA3: expected %v, got %v", expected, got)
-	}
-	t.Log("FMA3 Support:", got)
-}
-
-// TestFMA4 tests FMA4() function
-func TestFMA4(t *testing.T) {
-	got := CPU.FMA4()
-	expected := CPU.Features&FMA4 == FMA4
-	if got != expected {
-		t.Fatalf("FMA4: expected %v, got %v", expected, got)
-	}
-	t.Log("FMA4 Support:", got)
-}
-
-// TestXOP tests XOP() function
-func TestXOP(t *testing.T) {
-	got := CPU.XOP()
-	expected := CPU.Features&XOP == XOP
-	if got != expected {
-		t.Fatalf("XOP: expected %v, got %v", expected, got)
-	}
-	t.Log("XOP Support:", got)
-}
-
-// TestF16C tests F16C() function
-func TestF16C(t *testing.T) {
-	got := CPU.F16C()
-	expected := CPU.Features&F16C == F16C
-	if got != expected {
-		t.Fatalf("F16C: expected %v, got %v", expected, got)
-	}
-	t.Log("F16C Support:", got)
-}
-
-// TestCX16 tests CX16() function
-func TestCX16(t *testing.T) {
-	got := CPU.CX16()
-	expected := CPU.Features&CX16 == CX16
-	if got != expected {
-		t.Fatalf("CX16: expected %v, got %v", expected, got)
-	}
-	t.Log("CX16 Support:", got)
-}
-
 // TestSGX tests SGX detection
 func TestSGX(t *testing.T) {
 	got := CPU.SGX.Available
-	expected := CPU.Features&SGX == SGX
+	expected := CPU.featureSet.inSet(SGX)
 	if got != expected {
 		t.Fatalf("SGX: expected %v, got %v", expected, got)
 	}
@@ -300,484 +136,59 @@ func TestSGX(t *testing.T) {
 	}
 }
 
+func TestHas(t *testing.T) {
+	Detect()
+	defer Detect()
+	feats := CPU.FeatureSet()
+	for _, feat := range feats {
+		f := ParseFeature(feat)
+		if f == UNKNOWN {
+			t.Error("Got unknown feature:", feat)
+			continue
+		}
+		if !CPU.Has(f) {
+			t.Error("CPU.Has returned false, want true")
+		}
+		if !CPU.Supports(f) {
+			t.Error("CPU.Supports returned false, want true")
+		}
+		// Disable it.
+		CPU.Disable(f)
+		if CPU.Has(f) {
+			t.Error("CPU.Has returned true, want false")
+		}
+		if CPU.Supports(f) {
+			t.Error("CPU.Supports returned true, want false")
+		}
+		// Reenable
+		CPU.Enable(f)
+		if !CPU.Has(f) {
+			t.Error("CPU.Has returned false, want true")
+		}
+		if !CPU.Supports(f) {
+			t.Error("CPU.Supports returned false, want true")
+		}
+	}
+}
+
 // TestSGXLC tests SGX Launch Control detection
 func TestSGXLC(t *testing.T) {
 	got := CPU.SGX.LaunchControl
-	expected := CPU.Features&SGXLC == SGXLC
+	expected := CPU.featureSet.inSet(SGXLC)
 	if got != expected {
 		t.Fatalf("SGX: expected %v, got %v", expected, got)
 	}
 	t.Log("SGX Launch Control Support:", got)
 }
 
-// TestBMI1 tests BMI1() function
-func TestBMI1(t *testing.T) {
-	got := CPU.BMI1()
-	expected := CPU.Features&BMI1 == BMI1
-	if got != expected {
-		t.Fatalf("BMI1: expected %v, got %v", expected, got)
-	}
-	t.Log("BMI1 Support:", got)
-}
-
-// TestBMI2 tests BMI2() function
-func TestBMI2(t *testing.T) {
-	got := CPU.BMI2()
-	expected := CPU.Features&BMI2 == BMI2
-	if got != expected {
-		t.Fatalf("BMI2: expected %v, got %v", expected, got)
-	}
-	t.Log("BMI2 Support:", got)
-}
-
-// TestTBM tests TBM() function
-func TestTBM(t *testing.T) {
-	got := CPU.TBM()
-	expected := CPU.Features&TBM == TBM
-	if got != expected {
-		t.Fatalf("TBM: expected %v, got %v", expected, got)
-	}
-	t.Log("TBM Support:", got)
-}
-
-// TestLzcnt tests Lzcnt() function
-func TestLzcnt(t *testing.T) {
-	got := CPU.Lzcnt()
-	expected := CPU.Features&LZCNT == LZCNT
-	if got != expected {
-		t.Fatalf("Lzcnt: expected %v, got %v", expected, got)
-	}
-	t.Log("LZCNT Support:", got)
-}
-
-// TestLzcnt tests Lzcnt() function
-func TestPopcnt(t *testing.T) {
-	got := CPU.Popcnt()
-	expected := CPU.Features&POPCNT == POPCNT
-	if got != expected {
-		t.Fatalf("Popcnt: expected %v, got %v", expected, got)
-	}
-	t.Log("POPCNT Support:", got)
-}
-
-// TestAesNi tests AesNi() function
-func TestAesNi(t *testing.T) {
-	got := CPU.AesNi()
-	expected := CPU.Features&AESNI == AESNI
-	if got != expected {
-		t.Fatalf("AesNi: expected %v, got %v", expected, got)
-	}
-	t.Log("AESNI Support:", got)
-}
-
-// TestHTT tests HTT() function
-func TestHTT(t *testing.T) {
-	got := CPU.HTT()
-	expected := CPU.Features&HTT == HTT
-	if got != expected {
-		t.Fatalf("HTT: expected %v, got %v", expected, got)
-	}
-	t.Log("HTT Support:", got)
-}
-
-// TestClmul tests Clmul() function
-func TestClmul(t *testing.T) {
-	got := CPU.Clmul()
-	expected := CPU.Features&CLMUL == CLMUL
-	if got != expected {
-		t.Fatalf("Clmul: expected %v, got %v", expected, got)
-	}
-	t.Log("CLMUL Support:", got)
-}
-
-// TestSSE2Slow tests SSE2Slow() function
-func TestSSE2Slow(t *testing.T) {
-	got := CPU.SSE2Slow()
-	expected := CPU.Features&SSE2SLOW == SSE2SLOW
-	if got != expected {
-		t.Fatalf("SSE2Slow: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE2SLOW Support:", got)
-}
-
-// TestSSE3Slow tests SSE3slow() function
-func TestSSE3Slow(t *testing.T) {
-	got := CPU.SSE3Slow()
-	expected := CPU.Features&SSE3SLOW == SSE3SLOW
-	if got != expected {
-		t.Fatalf("SSE3slow: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE3SLOW Support:", got)
-}
-
-// TestAtom tests Atom() function
-func TestAtom(t *testing.T) {
-	got := CPU.Atom()
-	expected := CPU.Features&ATOM == ATOM
-	if got != expected {
-		t.Fatalf("Atom: expected %v, got %v", expected, got)
-	}
-	t.Log("ATOM Support:", got)
-}
-
-// TestNX tests NX() function (NX (No-Execute) bit)
-func TestNX(t *testing.T) {
-	got := CPU.NX()
-	expected := CPU.Features&NX == NX
-	if got != expected {
-		t.Fatalf("NX: expected %v, got %v", expected, got)
-	}
-	t.Log("NX Support:", got)
-}
-
-// TestSSE4A tests SSE4A() function (AMD Barcelona microarchitecture SSE4a instructions)
-func TestSSE4A(t *testing.T) {
-	got := CPU.SSE4A()
-	expected := CPU.Features&SSE4A == SSE4A
-	if got != expected {
-		t.Fatalf("SSE4A: expected %v, got %v", expected, got)
-	}
-	t.Log("SSE4A Support:", got)
-}
-
-// TestHLE tests HLE() function (Hardware Lock Elision)
-func TestHLE(t *testing.T) {
-	got := CPU.HLE()
-	expected := CPU.Features&HLE == HLE
-	if got != expected {
-		t.Fatalf("HLE: expected %v, got %v", expected, got)
-	}
-	t.Log("HLE Support:", got)
-}
-
-// TestRTM tests RTM() function (Restricted Transactional Memory)
-func TestRTM(t *testing.T) {
-	got := CPU.RTM()
-	expected := CPU.Features&RTM == RTM
-	if got != expected {
-		t.Fatalf("RTM: expected %v, got %v", expected, got)
-	}
-	t.Log("RTM Support:", got)
-}
-
-// TestRdrand tests RDRAND() function (RDRAND instruction is available)
-func TestRdrand(t *testing.T) {
-	got := CPU.Rdrand()
-	expected := CPU.Features&RDRAND == RDRAND
-	if got != expected {
-		t.Fatalf("Rdrand: expected %v, got %v", expected, got)
-	}
-	t.Log("Rdrand Support:", got)
-}
-
-// TestRdseed tests RDSEED() function (RDSEED instruction is available)
-func TestRdseed(t *testing.T) {
-	got := CPU.Rdseed()
-	expected := CPU.Features&RDSEED == RDSEED
-	if got != expected {
-		t.Fatalf("Rdseed: expected %v, got %v", expected, got)
-	}
-	t.Log("Rdseed Support:", got)
-}
-
-// TestADX tests ADX() function (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
-func TestADX(t *testing.T) {
-	got := CPU.ADX()
-	expected := CPU.Features&ADX == ADX
-	if got != expected {
-		t.Fatalf("ADX: expected %v, got %v", expected, got)
-	}
-	t.Log("ADX Support:", got)
-}
-
-// TestSHA tests SHA() function (Intel SHA Extensions)
-func TestSHA(t *testing.T) {
-	got := CPU.SHA()
-	expected := CPU.Features&SHA == SHA
-	if got != expected {
-		t.Fatalf("SHA: expected %v, got %v", expected, got)
-	}
-	t.Log("SHA Support:", got)
-}
-
-// TestAVX512F tests AVX512F() function (AVX-512 Foundation)
-func TestAVX512F(t *testing.T) {
-	got := CPU.AVX512F()
-	expected := CPU.Features&AVX512F == AVX512F
-	if got != expected {
-		t.Fatalf("AVX512F: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512F Support:", got)
-}
-
-// TestAVX512DQ tests AVX512DQ() function (AVX-512 Doubleword and Quadword Instructions)
-func TestAVX512DQ(t *testing.T) {
-	got := CPU.AVX512DQ()
-	expected := CPU.Features&AVX512DQ == AVX512DQ
-	if got != expected {
-		t.Fatalf("AVX512DQ: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512DQ Support:", got)
-}
-
-// TestAVX512IFMA tests AVX512IFMA() function (AVX-512 Integer Fused Multiply-Add Instructions)
-func TestAVX512IFMA(t *testing.T) {
-	got := CPU.AVX512IFMA()
-	expected := CPU.Features&AVX512IFMA == AVX512IFMA
-	if got != expected {
-		t.Fatalf("AVX512IFMA: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512IFMA Support:", got)
-}
-
-// TestAVX512PF tests AVX512PF() function (AVX-512 Prefetch Instructions)
-func TestAVX512PF(t *testing.T) {
-	got := CPU.AVX512PF()
-	expected := CPU.Features&AVX512PF == AVX512PF
-	if got != expected {
-		t.Fatalf("AVX512PF: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512PF Support:", got)
-}
-
-// TestAVX512ER tests AVX512ER() function (AVX-512 Exponential and Reciprocal Instructions)
-func TestAVX512ER(t *testing.T) {
-	got := CPU.AVX512ER()
-	expected := CPU.Features&AVX512ER == AVX512ER
-	if got != expected {
-		t.Fatalf("AVX512ER: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512ER Support:", got)
-}
-
-// TestAVX512CD tests AVX512CD() function (AVX-512 Conflict Detection Instructions)
-func TestAVX512CD(t *testing.T) {
-	got := CPU.AVX512CD()
-	expected := CPU.Features&AVX512CD == AVX512CD
-	if got != expected {
-		t.Fatalf("AVX512CD: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512CD Support:", got)
-}
-
-// TestAVX512BW tests AVX512BW() function (AVX-512 Byte and Word Instructions)
-func TestAVX512BW(t *testing.T) {
-	got := CPU.AVX512BW()
-	expected := CPU.Features&AVX512BW == AVX512BW
-	if got != expected {
-		t.Fatalf("AVX512BW: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512BW Support:", got)
-}
-
-// TestAVX512VL tests AVX512VL() function (AVX-512 Vector Length Extensions)
-func TestAVX512VL(t *testing.T) {
-	got := CPU.AVX512VL()
-	expected := CPU.Features&AVX512VL == AVX512VL
-	if got != expected {
-		t.Fatalf("AVX512VL: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512VL Support:", got)
-}
-
-// TestAVX512VBMI tests AVX512VBMI() function (AVX-512 Vector Bit Manipulation Instructions)
-func TestAVX512VBMI(t *testing.T) {
-	got := CPU.AVX512VBMI()
-	expected := CPU.Features&AVX512VBMI == AVX512VBMI
-	if got != expected {
-		t.Fatalf("AVX512VBMI: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512VBMI Support:", got)
-}
-
-// TestAVX512_VBMI2 tests AVX512VBMI2 function (AVX-512 Vector Bit Manipulation Instructions, Version 2)
-func TestAVX512_VBMI2(t *testing.T) {
-	got := CPU.AVX512VBMI2()
-	expected := CPU.Features&AVX512VBMI2 == AVX512VBMI2
-	if got != expected {
-		t.Fatalf("AVX512VBMI2: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512VBMI2 Support:", got)
-}
-
-// TestAVX512_VNNI tests AVX512VNNI() function (AVX-512 Vector Neural Network Instructions)
-func TestAVX512_VNNI(t *testing.T) {
-	got := CPU.AVX512VNNI()
-	expected := CPU.Features&AVX512VNNI == AVX512VNNI
-	if got != expected {
-		t.Fatalf("AVX512VNNI: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512VNNI Support:", got)
-}
-
-// TestAVX512_VPOPCNTDQ tests AVX512VPOPCNTDQ() function (AVX-512 Vector Population Count Doubleword and Quadword)
-func TestAVX512_VPOPCNTDQ(t *testing.T) {
-	got := CPU.AVX512VPOPCNTDQ()
-	expected := CPU.Features&AVX512VPOPCNTDQ == AVX512VPOPCNTDQ
-	if got != expected {
-		t.Fatalf("AVX512VPOPCNTDQ: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512VPOPCNTDQ Support:", got)
-}
-
-// TestGFNI tests GFNI() function (Galois Field New Instructions)
-func TestGFNI(t *testing.T) {
-	got := CPU.GFNI()
-	expected := CPU.Features&GFNI == GFNI
-	if got != expected {
-		t.Fatalf("GFNI: expected %v, got %v", expected, got)
-	}
-	t.Log("GFNI Support:", got)
-}
-
-// TestVAES tests VAES() function (Vector AES)
-func TestVAES(t *testing.T) {
-	got := CPU.VAES()
-	expected := CPU.Features&VAES == VAES
-	if got != expected {
-		t.Fatalf("VAES: expected %v, got %v", expected, got)
-	}
-	t.Log("VAES Support:", got)
-}
-
-// TestAVX512_BITALG tests AVX512BITALG() function (AVX-512 Bit Algorithms)
-func TestAVX512_BITALG(t *testing.T) {
-	got := CPU.AVX512BITALG()
-	expected := CPU.Features&AVX512BITALG == AVX512BITALG
-	if got != expected {
-		t.Fatalf("AVX512BITALG: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512BITALG Support:", got)
-}
-
-// TestVPCLMULQDQ tests VPCLMULQDQ() function (Carry-Less Multiplication Quadword)
-func TestVPCLMULQDQ(t *testing.T) {
-	got := CPU.VPCLMULQDQ()
-	expected := CPU.Features&VPCLMULQDQ == VPCLMULQDQ
-	if got != expected {
-		t.Fatalf("VPCLMULQDQ: expected %v, got %v", expected, got)
-	}
-	t.Log("VPCLMULQDQ Support:", got)
-}
-
-// TestAVX512_BF16 tests AVX512BF16() function (AVX-512 BFLOAT16 Instructions)
-func TestAVX512_BF16(t *testing.T) {
-	got := CPU.AVX512BF16()
-	expected := CPU.Features&AVX512BF16 == AVX512BF16
-	if got != expected {
-		t.Fatalf("AVX512BF16: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512BF16 Support:", got)
-}
-
-// TestAVX512_VP2INTERSECT tests AVX512VP2INTERSECT() function (AVX-512 Intersect for D/Q)
-func TestAVX512_VP2INTERSECT(t *testing.T) {
-	got := CPU.AVX512VP2INTERSECT()
-	expected := CPU.Features&AVX512VP2INTERSECT == AVX512VP2INTERSECT
-	if got != expected {
-		t.Fatalf("AVX512VP2INTERSECT: expected %v, got %v", expected, got)
-	}
-	t.Log("AVX512VP2INTERSECT Support:", got)
-}
-
-// TestMPX tests MPX() function (Intel MPX (Memory Protection Extensions))
-func TestMPX(t *testing.T) {
-	got := CPU.MPX()
-	expected := CPU.Features&MPX == MPX
-	if got != expected {
-		t.Fatalf("MPX: expected %v, got %v", expected, got)
-	}
-	t.Log("MPX Support:", got)
-}
-
-// TestERMS tests ERMS() function (Enhanced REP MOVSB/STOSB)
-func TestERMS(t *testing.T) {
-	got := CPU.ERMS()
-	expected := CPU.Features&ERMS == ERMS
-	if got != expected {
-		t.Fatalf("ERMS: expected %v, got %v", expected, got)
-	}
-	t.Log("ERMS Support:", got)
-}
-
-// TestVendor writes the detected vendor. Will be 0 if unknown
-func TestVendor(t *testing.T) {
-	t.Log("Vendor ID:", CPU.VendorID)
-}
-
-// Intel returns true if vendor is recognized as Intel
-func TestIntel(t *testing.T) {
-	got := CPU.Intel()
-	expected := CPU.VendorID == Intel
-	if got != expected {
-		t.Fatalf("TestIntel: expected %v, got %v", expected, got)
-	}
-	t.Log("TestIntel:", got)
-}
-
-// AMD returns true if vendor is recognized as AMD
-func TestAMD(t *testing.T) {
-	got := CPU.AMD()
-	expected := CPU.VendorID == AMD
-	if got != expected {
-		t.Fatalf("TestAMD: expected %v, got %v", expected, got)
-	}
-	t.Log("TestAMD:", got)
-}
-
-// Hygon returns true if vendor is recognized as Hygon
-func TestHygon(t *testing.T) {
-	got := CPU.Hygon()
-	expected := CPU.VendorID == Hygon
-	if got != expected {
-		t.Fatalf("TestHygon: expected %v, got %v", expected, got)
-	}
-	t.Log("TestHygon:", got)
-}
-
-// Transmeta returns true if vendor is recognized as Transmeta
-func TestTransmeta(t *testing.T) {
-	got := CPU.Transmeta()
-	expected := CPU.VendorID == Transmeta
-	if got != expected {
-		t.Fatalf("TestTransmeta: expected %v, got %v", expected, got)
-	}
-	t.Log("TestTransmeta:", got)
-}
-
-// NSC returns true if vendor is recognized as National Semiconductor
-func TestNSC(t *testing.T) {
-	got := CPU.NSC()
-	expected := CPU.VendorID == NSC
-	if got != expected {
-		t.Fatalf("TestNSC: expected %v, got %v", expected, got)
-	}
-	t.Log("TestNSC:", got)
-}
-
-// VIA returns true if vendor is recognized as VIA
-func TestVIA(t *testing.T) {
-	got := CPU.VIA()
-	expected := CPU.VendorID == VIA
-	if got != expected {
-		t.Fatalf("TestVIA: expected %v, got %v", expected, got)
-	}
-	t.Log("TestVIA:", got)
-}
-
 // Test VM function
 func TestVM(t *testing.T) {
-	t.Log("Vendor ID:", CPU.VM())
-}
-
-// TSX returns true if cpu supports transactional sync extensions.
-func TestCPUInfo_TSX(t *testing.T) {
-	got := CPU.TSX()
-	expected := CPU.HLE() && CPU.RTM()
+	got := CPU.VM()
+	expected := CPU.featureSet.inSet(HYPERVISOR)
 	if got != expected {
-		t.Fatalf("TestCPUInfo_TSX: expected %v, got %v", expected, got)
+		t.Fatalf("TestVM: expected %v, got %v", expected, got)
 	}
-	t.Log("TestCPUInfo_TSX:", got)
+	t.Log("TestVM:", got)
 }
 
 // Test RTCounter function
@@ -832,42 +243,3 @@ func ExampleCPUInfo_Ia32TscAux() {
 	core := ecx & 0xFFF
 	fmt.Println("Chip, Core:", chip, core)
 }
-
-/*
-func TestPhysical(t *testing.T) {
-	var test16 = "CPUID 00000000: 0000000d-756e6547-6c65746e-49656e69 \nCPUID 00000001: 000206d7-03200800-1fbee3ff-bfebfbff   \nCPUID 00000002: 76035a01-00f0b2ff-00000000-00ca0000   \nCPUID 00000003: 00000000-00000000-00000000-00000000   \nCPUID 00000004: 3c004121-01c0003f-0000003f-00000000   \nCPUID 00000004: 3c004122-01c0003f-0000003f-00000000   \nCPUID 00000004: 3c004143-01c0003f-000001ff-00000000   \nCPUID 00000004: 3c07c163-04c0003f-00003fff-00000006   \nCPUID 00000005: 00000040-00000040-00000003-00021120   \nCPUID 00000006: 00000075-00000002-00000009-00000000   \nCPUID 00000007: 00000000-00000000-00000000-00000000   \nCPUID 00000008: 00000000-00000000-00000000-00000000   \nCPUID 00000009: 00000001-00000000-00000000-00000000   \nCPUID 0000000a: 07300403-00000000-00000000-00000603   \nCPUID 0000000b: 00000000-00000000-00000003-00000003   \nCPUID 0000000b: 00000005-00000010-00000201-00000003   \nCPUID 0000000c: 00000000-00000000-00000000-00000000   \nCPUID 0000000d: 00000007-00000340-00000340-00000000   \nCPUID 0000000d: 00000001-00000000-00000000-00000000   \nCPUID 0000000d: 00000100-00000240-00000000-00000000   \nCPUID 80000000: 80000008-00000000-00000000-00000000   \nCPUID 80000001: 00000000-00000000-00000001-2c100800   \nCPUID 80000002: 20202020-49202020-6c65746e-20295228   \nCPUID 80000003: 6e6f6558-20295228-20555043-322d3545   \nCPUID 80000004: 20303636-20402030-30322e32-007a4847   \nCPUID 80000005: 00000000-00000000-00000000-00000000   \nCPUID 80000006: 00000000-00000000-01006040-00000000   \nCPUID 80000007: 00000000-00000000-00000000-00000100   \nCPUID 80000008: 0000302e-00000000-00000000-00000000"
-	restore := mockCPU([]byte(test16))
-	Detect()
-	t.Log("Name:", CPU.BrandName)
-	n := maxFunctionID()
-	t.Logf("Max Function:0x%x\n", n)
-	n = maxExtendedFunction()
-	t.Logf("Max Extended Function:0x%x\n", n)
-	t.Log("PhysicalCores:", CPU.PhysicalCores)
-	t.Log("ThreadsPerCore:", CPU.ThreadsPerCore)
-	t.Log("LogicalCores:", CPU.LogicalCores)
-	t.Log("Family", CPU.Family, "Model:", CPU.Model)
-	t.Log("Features:", CPU.Features)
-	t.Log("Cacheline bytes:", CPU.CacheLine)
-	t.Log("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
-	t.Log("L1 Data Cache:", CPU.Cache.L1D, "bytes")
-	t.Log("L2 Cache:", CPU.Cache.L2, "bytes")
-	t.Log("L3 Cache:", CPU.Cache.L3, "bytes")
-	if CPU.LogicalCores > 0 && CPU.PhysicalCores > 0 {
-		if CPU.LogicalCores != CPU.PhysicalCores*CPU.ThreadsPerCore {
-			t.Fatalf("Core count mismatch, LogicalCores (%d) != PhysicalCores (%d) * CPU.ThreadsPerCore (%d)",
-				CPU.LogicalCores, CPU.PhysicalCores, CPU.ThreadsPerCore)
-		}
-	}
-
-	if CPU.ThreadsPerCore > 1 && !CPU.HTT() {
-		t.Fatalf("Hyperthreading not detected")
-	}
-	if CPU.ThreadsPerCore == 1 && CPU.HTT() {
-		t.Fatalf("Hyperthreading detected, but only 1 Thread per core")
-	}
-	restore()
-	Detect()
-	TestCPUID(t)
-}
-*/
diff --git a/detect_arm64.go b/detect_arm64.go
index 923a826..9a53504 100644
--- a/detect_arm64.go
+++ b/detect_arm64.go
@@ -1,9 +1,12 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build arm64,!gccgo,!noasm,!appengine
+//go:build arm64 && !gccgo && !noasm && !appengine
+// +build arm64,!gccgo,!noasm,!appengine
 
 package cpuid
 
+import "runtime"
+
 func getMidr() (midr uint64)
 func getProcFeatures() (procFeatures uint64)
 func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
@@ -15,14 +18,19 @@ func initCPU() {
 	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
 }
 
-func addInfo(c *CPUInfo) {
-	// ARM64 disabled for now.
-	if true {
+func addInfo(c *CPUInfo, safe bool) {
+	// Seems to be safe to assume on ARM64
+	c.CacheLine = 64
+	detectOS(c)
+
+	// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
+	if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
 		return
 	}
-	// 	midr := getMidr()
+	midr := getMidr()
 
 	// MIDR_EL1 - Main ID Register
+	// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
 	//  x--------------------------------------------------x
 	//  | Name                         |  bits   | visible |
 	//  |--------------------------------------------------|
@@ -37,11 +45,70 @@ func addInfo(c *CPUInfo) {
 	//  | Revision                     | [3-0]   |    y    |
 	//  x--------------------------------------------------x
 
-	// 	fmt.Printf(" implementer:  0x%02x\n", (midr>>24)&0xff)
-	// 	fmt.Printf("     variant:   0x%01x\n", (midr>>20)&0xf)
-	// 	fmt.Printf("architecture:   0x%01x\n", (midr>>16)&0xf)
-	// 	fmt.Printf("    part num: 0x%03x\n", (midr>>4)&0xfff)
-	// 	fmt.Printf("    revision:   0x%01x\n", (midr>>0)&0xf)
+	switch (midr >> 24) & 0xff {
+	case 0xC0:
+		c.VendorString = "Ampere Computing"
+		c.VendorID = Ampere
+	case 0x41:
+		c.VendorString = "Arm Limited"
+		c.VendorID = ARM
+	case 0x42:
+		c.VendorString = "Broadcom Corporation"
+		c.VendorID = Broadcom
+	case 0x43:
+		c.VendorString = "Cavium Inc"
+		c.VendorID = Cavium
+	case 0x44:
+		c.VendorString = "Digital Equipment Corporation"
+		c.VendorID = DEC
+	case 0x46:
+		c.VendorString = "Fujitsu Ltd"
+		c.VendorID = Fujitsu
+	case 0x49:
+		c.VendorString = "Infineon Technologies AG"
+		c.VendorID = Infineon
+	case 0x4D:
+		c.VendorString = "Motorola or Freescale Semiconductor Inc"
+		c.VendorID = Motorola
+	case 0x4E:
+		c.VendorString = "NVIDIA Corporation"
+		c.VendorID = NVIDIA
+	case 0x50:
+		c.VendorString = "Applied Micro Circuits Corporation"
+		c.VendorID = AMCC
+	case 0x51:
+		c.VendorString = "Qualcomm Inc"
+		c.VendorID = Qualcomm
+	case 0x56:
+		c.VendorString = "Marvell International Ltd"
+		c.VendorID = Marvell
+	case 0x69:
+		c.VendorString = "Intel Corporation"
+		c.VendorID = Intel
+	}
+
+	// Lower 4 bits: Architecture
+	// Architecture	Meaning
+	// 0b0001		Armv4.
+	// 0b0010		Armv4T.
+	// 0b0011		Armv5 (obsolete).
+	// 0b0100		Armv5T.
+	// 0b0101		Armv5TE.
+	// 0b0110		Armv5TEJ.
+	// 0b0111		Armv6.
+	// 0b1111		Architectural features are individually identified in the ID_* registers, see 'ID registers'.
+	// Upper 4 bit: Variant
+	// An IMPLEMENTATION DEFINED variant number.
+	// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
+	c.Family = int(midr>>16) & 0xff
+
+	// PartNum, bits [15:4]
+	// An IMPLEMENTATION DEFINED primary part number for the device.
+	// On processors implemented by Arm, if the top four bits of the primary
+	// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
+	// Revision, bits [3:0]
+	// An IMPLEMENTATION DEFINED revision number for the device.
+	c.Model = int(midr) & 0xffff
 
 	procFeatures := getProcFeatures()
 
@@ -68,25 +135,18 @@ func addInfo(c *CPUInfo) {
 	// | EL0                          | [3-0]   |    n    |
 	// x--------------------------------------------------x
 
-	var f ArmFlags
+	var f flagSet
 	// if procFeatures&(0xf<<48) != 0 {
 	// 	fmt.Println("DIT")
 	// }
-	if procFeatures&(0xf<<32) != 0 {
-		f |= SVE
-	}
+	f.setIf(procFeatures&(0xf<<32) != 0, SVE)
 	if procFeatures&(0xf<<20) != 15<<20 {
-		f |= ASIMD
-		if procFeatures&(0xf<<20) == 1<<20 {
-			// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
-			// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
-			f |= FPHP
-			f |= ASIMDHP
-		}
-	}
-	if procFeatures&(0xf<<16) != 0 {
-		f |= FP
+		f.set(ASIMD)
+		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
+		// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
+		f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
 	}
+	f.setIf(procFeatures&(0xf<<16) != 0, FP)
 
 	instAttrReg0, instAttrReg1 := getInstAttributes()
 
@@ -127,46 +187,22 @@ func addInfo(c *CPUInfo) {
 	// if instAttrReg0&(0xf<<48) != 0 {
 	// 	fmt.Println("FHM")
 	// }
-	if instAttrReg0&(0xf<<44) != 0 {
-		f |= ASIMDDP
-	}
-	if instAttrReg0&(0xf<<40) != 0 {
-		f |= SM4
-	}
-	if instAttrReg0&(0xf<<36) != 0 {
-		f |= SM3
-	}
-	if instAttrReg0&(0xf<<32) != 0 {
-		f |= SHA3
-	}
-	if instAttrReg0&(0xf<<28) != 0 {
-		f |= ASIMDRDM
-	}
-	if instAttrReg0&(0xf<<20) != 0 {
-		f |= ATOMICS
-	}
-	if instAttrReg0&(0xf<<16) != 0 {
-		f |= CRC32
-	}
-	if instAttrReg0&(0xf<<12) != 0 {
-		f |= SHA2
-	}
-	if instAttrReg0&(0xf<<12) == 2<<12 {
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-		// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
-		f |= SHA512
-	}
-	if instAttrReg0&(0xf<<8) != 0 {
-		f |= SHA1
-	}
-	if instAttrReg0&(0xf<<4) != 0 {
-		f |= AES
-	}
-	if instAttrReg0&(0xf<<4) == 2<<4 {
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-		// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
-		f |= PMULL
-	}
+	f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
+	f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
+	f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
+	f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
+	f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
+	f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
+	f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
+	f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
+	f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
+	f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
+	f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
+	f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
 
 	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
 	//
@@ -194,26 +230,18 @@ func addInfo(c *CPUInfo) {
 	// if instAttrReg1&(0xf<<28) != 0 {
 	// 	fmt.Println("GPI")
 	// }
-	if instAttrReg1&(0xf<<28) != 24 {
-		f |= GPA
-	}
-	if instAttrReg1&(0xf<<20) != 0 {
-		f |= LRCPC
-	}
-	if instAttrReg1&(0xf<<16) != 0 {
-		f |= FCMA
-	}
-	if instAttrReg1&(0xf<<12) != 0 {
-		f |= JSCVT
-	}
+	f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
+	f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
+	f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
+	f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
 	// if instAttrReg1&(0xf<<8) != 0 {
 	// 	fmt.Println("API")
 	// }
 	// if instAttrReg1&(0xf<<4) != 0 {
 	// 	fmt.Println("APA")
 	// }
-	if instAttrReg1&(0xf<<0) != 0 {
-		f |= DCPOP
-	}
-	c.Arm = f
+	f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
+
+	// Store
+	c.featureSet.or(f)
 }
diff --git a/detect_ref.go b/detect_ref.go
index 970ff3d..9636c2b 100644
--- a/detect_ref.go
+++ b/detect_ref.go
@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build !amd64,!386,!arm64 gccgo noasm appengine
+//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
+// +build !amd64,!386,!arm64 gccgo noasm appengine
 
 package cpuid
 
@@ -11,4 +12,4 @@ func initCPU() {
 	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
 }
 
-func addInfo(info *CPUInfo) {}
+func addInfo(info *CPUInfo, safe bool) {}
diff --git a/detect_intel.go b/detect_x86.go
similarity index 65%
rename from detect_intel.go
rename to detect_x86.go
index 363951b..35678d8 100644
--- a/detect_intel.go
+++ b/detect_x86.go
@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine
+//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
+// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
 
 package cpuid
 
@@ -8,26 +9,28 @@ func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
 func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
 func asmXgetbv(index uint32) (eax, edx uint32)
 func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+func asmDarwinHasAVX512() bool
 
 func initCPU() {
 	cpuid = asmCpuid
 	cpuidex = asmCpuidex
 	xgetbv = asmXgetbv
 	rdtscpAsm = asmRdtscpAsm
+	darwinHasAVX512 = asmDarwinHasAVX512
 }
 
-func addInfo(c *CPUInfo) {
+func addInfo(c *CPUInfo, safe bool) {
 	c.maxFunc = maxFunctionID()
 	c.maxExFunc = maxExtendedFunction()
 	c.BrandName = brandName()
 	c.CacheLine = cacheLine()
 	c.Family, c.Model = familyModel()
-	c.Features = support()
-	c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0)
+	c.featureSet = support()
+	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
 	c.VendorID, c.VendorString = vendorID()
-	c.Hz = hertz(c.BrandName)
 	c.cacheSize()
+	c.frequencies()
 }
diff --git a/featureid_string.go b/featureid_string.go
new file mode 100644
index 0000000..02fe232
--- /dev/null
+++ b/featureid_string.go
@@ -0,0 +1,196 @@
+// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
+
+package cpuid
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[ADX-1]
+	_ = x[AESNI-2]
+	_ = x[AMD3DNOW-3]
+	_ = x[AMD3DNOWEXT-4]
+	_ = x[AMXBF16-5]
+	_ = x[AMXINT8-6]
+	_ = x[AMXTILE-7]
+	_ = x[AVX-8]
+	_ = x[AVX2-9]
+	_ = x[AVX512BF16-10]
+	_ = x[AVX512BITALG-11]
+	_ = x[AVX512BW-12]
+	_ = x[AVX512CD-13]
+	_ = x[AVX512DQ-14]
+	_ = x[AVX512ER-15]
+	_ = x[AVX512F-16]
+	_ = x[AVX512FP16-17]
+	_ = x[AVX512IFMA-18]
+	_ = x[AVX512PF-19]
+	_ = x[AVX512VBMI-20]
+	_ = x[AVX512VBMI2-21]
+	_ = x[AVX512VL-22]
+	_ = x[AVX512VNNI-23]
+	_ = x[AVX512VP2INTERSECT-24]
+	_ = x[AVX512VPOPCNTDQ-25]
+	_ = x[AVXSLOW-26]
+	_ = x[BMI1-27]
+	_ = x[BMI2-28]
+	_ = x[CETIBT-29]
+	_ = x[CETSS-30]
+	_ = x[CLDEMOTE-31]
+	_ = x[CLMUL-32]
+	_ = x[CLZERO-33]
+	_ = x[CMOV-34]
+	_ = x[CMPXCHG8-35]
+	_ = x[CPBOOST-36]
+	_ = x[CX16-37]
+	_ = x[ENQCMD-38]
+	_ = x[ERMS-39]
+	_ = x[F16C-40]
+	_ = x[FMA3-41]
+	_ = x[FMA4-42]
+	_ = x[FXSR-43]
+	_ = x[FXSROPT-44]
+	_ = x[GFNI-45]
+	_ = x[HLE-46]
+	_ = x[HTT-47]
+	_ = x[HWA-48]
+	_ = x[HYPERVISOR-49]
+	_ = x[IBPB-50]
+	_ = x[IBS-51]
+	_ = x[IBSBRNTRGT-52]
+	_ = x[IBSFETCHSAM-53]
+	_ = x[IBSFFV-54]
+	_ = x[IBSOPCNT-55]
+	_ = x[IBSOPCNTEXT-56]
+	_ = x[IBSOPSAM-57]
+	_ = x[IBSRDWROPCNT-58]
+	_ = x[IBSRIPINVALIDCHK-59]
+	_ = x[INT_WBINVD-60]
+	_ = x[INVLPGB-61]
+	_ = x[LAHF-62]
+	_ = x[LZCNT-63]
+	_ = x[MCAOVERFLOW-64]
+	_ = x[MCOMMIT-65]
+	_ = x[MMX-66]
+	_ = x[MMXEXT-67]
+	_ = x[MOVBE-68]
+	_ = x[MOVDIR64B-69]
+	_ = x[MOVDIRI-70]
+	_ = x[MPX-71]
+	_ = x[MSRIRC-72]
+	_ = x[NX-73]
+	_ = x[OSXSAVE-74]
+	_ = x[POPCNT-75]
+	_ = x[RDPRU-76]
+	_ = x[RDRAND-77]
+	_ = x[RDSEED-78]
+	_ = x[RDTSCP-79]
+	_ = x[RTM-80]
+	_ = x[RTM_ALWAYS_ABORT-81]
+	_ = x[SCE-82]
+	_ = x[SERIALIZE-83]
+	_ = x[SGX-84]
+	_ = x[SGXLC-85]
+	_ = x[SHA-86]
+	_ = x[SSE-87]
+	_ = x[SSE2-88]
+	_ = x[SSE3-89]
+	_ = x[SSE4-90]
+	_ = x[SSE42-91]
+	_ = x[SSE4A-92]
+	_ = x[SSSE3-93]
+	_ = x[STIBP-94]
+	_ = x[SUCCOR-95]
+	_ = x[TBM-96]
+	_ = x[TSXLDTRK-97]
+	_ = x[VAES-98]
+	_ = x[VMX-99]
+	_ = x[VPCLMULQDQ-100]
+	_ = x[WAITPKG-101]
+	_ = x[WBNOINVD-102]
+	_ = x[X87-103]
+	_ = x[XOP-104]
+	_ = x[XSAVE-105]
+	_ = x[AESARM-106]
+	_ = x[ARMCPUID-107]
+	_ = x[ASIMD-108]
+	_ = x[ASIMDDP-109]
+	_ = x[ASIMDHP-110]
+	_ = x[ASIMDRDM-111]
+	_ = x[ATOMICS-112]
+	_ = x[CRC32-113]
+	_ = x[DCPOP-114]
+	_ = x[EVTSTRM-115]
+	_ = x[FCMA-116]
+	_ = x[FP-117]
+	_ = x[FPHP-118]
+	_ = x[GPA-119]
+	_ = x[JSCVT-120]
+	_ = x[LRCPC-121]
+	_ = x[PMULL-122]
+	_ = x[SHA1-123]
+	_ = x[SHA2-124]
+	_ = x[SHA3-125]
+	_ = x[SHA512-126]
+	_ = x[SM3-127]
+	_ = x[SM4-128]
+	_ = x[SVE-129]
+	_ = x[lastID-130]
+	_ = x[firstID-0]
+}
+
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKINT_WBINVDINVLPGBLAHFLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMPXMSRIRCNXOSXSAVEPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSCESERIALIZESGXSGXLCSHASSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSUCCORTBMTSXLDTRKVAESVMXVPCLMULQDQWAITPKGWBNOINVDX87XOPXSAVEAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 234, 238, 244, 249, 257, 262, 268, 272, 280, 287, 291, 297, 301, 305, 309, 313, 317, 324, 328, 331, 334, 337, 347, 351, 354, 364, 375, 381, 389, 400, 408, 420, 436, 446, 453, 457, 462, 473, 480, 483, 489, 494, 503, 510, 513, 519, 521, 528, 534, 539, 545, 551, 557, 560, 576, 579, 588, 591, 596, 599, 602, 606, 610, 614, 619, 624, 629, 634, 640, 643, 651, 655, 658, 668, 675, 683, 686, 689, 694, 700, 708, 713, 720, 727, 735, 742, 747, 752, 759, 763, 765, 769, 772, 777, 782, 787, 791, 795, 799, 805, 808, 811, 814, 820}
+
+func (i FeatureID) String() string {
+	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
+		return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[VendorUnknown-0]
+	_ = x[Intel-1]
+	_ = x[AMD-2]
+	_ = x[VIA-3]
+	_ = x[Transmeta-4]
+	_ = x[NSC-5]
+	_ = x[KVM-6]
+	_ = x[MSVM-7]
+	_ = x[VMware-8]
+	_ = x[XenHVM-9]
+	_ = x[Bhyve-10]
+	_ = x[Hygon-11]
+	_ = x[SiS-12]
+	_ = x[RDC-13]
+	_ = x[Ampere-14]
+	_ = x[ARM-15]
+	_ = x[Broadcom-16]
+	_ = x[Cavium-17]
+	_ = x[DEC-18]
+	_ = x[Fujitsu-19]
+	_ = x[Infineon-20]
+	_ = x[Motorola-21]
+	_ = x[NVIDIA-22]
+	_ = x[AMCC-23]
+	_ = x[Qualcomm-24]
+	_ = x[Marvell-25]
+	_ = x[lastVendor-26]
+}
+
+const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
+
+var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
+
+func (i Vendor) String() string {
+	if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
+		return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
+}
diff --git a/go.mod b/go.mod
index 55563f2..3ad3f84 100644
--- a/go.mod
+++ b/go.mod
@@ -1,3 +1,3 @@
-module github.com/klauspost/cpuid
+module github.com/klauspost/cpuid/v2
 
-go 1.12
+go 1.15
diff --git a/mockcpu_test.go b/mockcpu_test.go
index 8bbc5c7..c44b7fc 100644
--- a/mockcpu_test.go
+++ b/mockcpu_test.go
@@ -4,6 +4,7 @@ import (
 	"archive/zip"
 	"fmt"
 	"io/ioutil"
+	"math"
 	"sort"
 	"strings"
 	"testing"
@@ -44,9 +45,9 @@ func mockCPU(def []byte) func() {
 				break
 			}
 		}
-		if !strings.Contains(line, "-") {
-			//continue
-		}
+		//if !strings.Contains(line, "-") {
+		//	continue
+		//}
 		items := strings.Split(line, ":")
 		if len(items) < 2 {
 			if len(line) == 51 || len(line) == 50 {
@@ -150,7 +151,7 @@ func mockCPU(def []byte) func() {
 			panic(fmt.Sprintf("XGETBV not supported %v", fakeID))
 		}
 		// We don't have any data to return, unfortunately
-		return 0, 0
+		return math.MaxUint32, math.MaxUint32
 	}
 	return restorer
 }
@@ -185,13 +186,15 @@ func TestMocks(t *testing.T) {
 		t.Log("ThreadsPerCore:", CPU.ThreadsPerCore)
 		t.Log("LogicalCores:", CPU.LogicalCores)
 		t.Log("Family", CPU.Family, "Model:", CPU.Model)
-		t.Log("Features:", CPU.Features)
+		t.Log("Features:", strings.Join(CPU.FeatureSet(), ","))
+		t.Log("Microarchitecture level:", CPU.X64Level())
 		t.Log("Cacheline bytes:", CPU.CacheLine)
 		t.Log("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
 		t.Log("L1 Data Cache:", CPU.Cache.L1D, "bytes")
 		t.Log("L2 Cache:", CPU.Cache.L2, "bytes")
 		t.Log("L3 Cache:", CPU.Cache.L3, "bytes")
 		t.Log("Hz:", CPU.Hz, "Hz")
+		t.Log("Boost:", CPU.BoostFreq, "Hz")
 		if CPU.LogicalCores > 0 && CPU.PhysicalCores > 0 {
 			if CPU.LogicalCores != CPU.PhysicalCores*CPU.ThreadsPerCore {
 				t.Fatalf("Core count mismatch, LogicalCores (%d) != PhysicalCores (%d) * CPU.ThreadsPerCore (%d)",
@@ -199,10 +202,10 @@ func TestMocks(t *testing.T) {
 			}
 		}
 
-		if CPU.ThreadsPerCore > 1 && !CPU.HTT() {
+		if CPU.ThreadsPerCore > 1 && !CPU.Supports(HTT) {
 			t.Fatalf("Hyperthreading not detected")
 		}
-		if CPU.ThreadsPerCore == 1 && CPU.HTT() {
+		if CPU.ThreadsPerCore == 1 && CPU.Supports(HTT) {
 			t.Fatalf("Hyperthreading detected, but only 1 Thread per core")
 		}
 		restore()
diff --git a/os_darwin_arm64.go b/os_darwin_arm64.go
new file mode 100644
index 0000000..8d2cb03
--- /dev/null
+++ b/os_darwin_arm64.go
@@ -0,0 +1,19 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+package cpuid
+
+import "runtime"
+
+func detectOS(c *CPUInfo) bool {
+	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
+	// to detect their supported state dynamically. Assume the CPU features that
+	// Apple Silicon M1 supports to be available as a minimal set of features
+	// to all Go programs running on darwin/arm64.
+	// TODO: Add more if we know them.
+	c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
+	c.PhysicalCores = runtime.NumCPU()
+	// For now assuming 1 thread per core...
+	c.ThreadsPerCore = 1
+	c.LogicalCores = c.PhysicalCores
+	return true
+}
diff --git a/os_linux_arm64.go b/os_linux_arm64.go
new file mode 100644
index 0000000..ee278b9
--- /dev/null
+++ b/os_linux_arm64.go
@@ -0,0 +1,130 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file located
+// here https://github.com/golang/sys/blob/master/LICENSE
+
+package cpuid
+
+import (
+	"encoding/binary"
+	"io/ioutil"
+	"runtime"
+)
+
+// HWCAP bits.
+const (
+	hwcap_FP       = 1 << 0
+	hwcap_ASIMD    = 1 << 1
+	hwcap_EVTSTRM  = 1 << 2
+	hwcap_AES      = 1 << 3
+	hwcap_PMULL    = 1 << 4
+	hwcap_SHA1     = 1 << 5
+	hwcap_SHA2     = 1 << 6
+	hwcap_CRC32    = 1 << 7
+	hwcap_ATOMICS  = 1 << 8
+	hwcap_FPHP     = 1 << 9
+	hwcap_ASIMDHP  = 1 << 10
+	hwcap_CPUID    = 1 << 11
+	hwcap_ASIMDRDM = 1 << 12
+	hwcap_JSCVT    = 1 << 13
+	hwcap_FCMA     = 1 << 14
+	hwcap_LRCPC    = 1 << 15
+	hwcap_DCPOP    = 1 << 16
+	hwcap_SHA3     = 1 << 17
+	hwcap_SM3      = 1 << 18
+	hwcap_SM4      = 1 << 19
+	hwcap_ASIMDDP  = 1 << 20
+	hwcap_SHA512   = 1 << 21
+	hwcap_SVE      = 1 << 22
+	hwcap_ASIMDFHM = 1 << 23
+)
+
+func detectOS(c *CPUInfo) bool {
+	// For now assuming no hyperthreading is reasonable.
+	c.LogicalCores = runtime.NumCPU()
+	c.PhysicalCores = c.LogicalCores
+	c.ThreadsPerCore = 1
+	if hwcap == 0 {
+		// We did not get values from the runtime.
+		// Try reading /proc/self/auxv
+
+		// From https://github.com/golang/sys
+		const (
+			_AT_HWCAP  = 16
+			_AT_HWCAP2 = 26
+
+			uintSize = int(32 << (^uint(0) >> 63))
+		)
+
+		buf, err := ioutil.ReadFile("/proc/self/auxv")
+		if err != nil {
+			// e.g. on android /proc/self/auxv is not accessible, so silently
+			// ignore the error and leave Initialized = false. On some
+			// architectures (e.g. arm64) doinit() implements a fallback
+			// readout and will set Initialized = true again.
+			return false
+		}
+		bo := binary.LittleEndian
+		for len(buf) >= 2*(uintSize/8) {
+			var tag, val uint
+			switch uintSize {
+			case 32:
+				tag = uint(bo.Uint32(buf[0:]))
+				val = uint(bo.Uint32(buf[4:]))
+				buf = buf[8:]
+			case 64:
+				tag = uint(bo.Uint64(buf[0:]))
+				val = uint(bo.Uint64(buf[8:]))
+				buf = buf[16:]
+			}
+			switch tag {
+			case _AT_HWCAP:
+				hwcap = val
+			case _AT_HWCAP2:
+				// Not used
+			}
+		}
+		if hwcap == 0 {
+			return false
+		}
+	}
+
+	// HWCap was populated by the runtime from the auxiliary vector.
+	// Use HWCap information since reading aarch64 system registers
+	// is not supported in user space on older linux kernels.
+	c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
+	c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
+	c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
+	c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
+	c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
+
+	// The Samsung S9+ kernel reports support for atomics, but not all cores
+	// actually support them, resulting in SIGILL. See issue #28431.
+	// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
+	c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
+
+	return true
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/os_other_arm64.go b/os_other_arm64.go
new file mode 100644
index 0000000..8733ba3
--- /dev/null
+++ b/os_other_arm64.go
@@ -0,0 +1,16 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build arm64 && !linux && !darwin
+// +build arm64,!linux,!darwin
+
+package cpuid
+
+import "runtime"
+
+func detectOS(c *CPUInfo) bool {
+	c.PhysicalCores = runtime.NumCPU()
+	// For now assuming 1 thread per core...
+	c.ThreadsPerCore = 1
+	c.LogicalCores = c.PhysicalCores
+	return false
+}
diff --git a/os_safe_linux_arm64.go b/os_safe_linux_arm64.go
new file mode 100644
index 0000000..f8f201b
--- /dev/null
+++ b/os_safe_linux_arm64.go
@@ -0,0 +1,8 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build nounsafe
+// +build nounsafe
+
+package cpuid
+
+var hwcap uint
diff --git a/os_unsafe_linux_arm64.go b/os_unsafe_linux_arm64.go
new file mode 100644
index 0000000..92af622
--- /dev/null
+++ b/os_unsafe_linux_arm64.go
@@ -0,0 +1,11 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build !nounsafe
+// +build !nounsafe
+
+package cpuid
+
+import _ "unsafe" // needed for go:linkname
+
+//go:linkname hwcap internal/cpu.HWCap
+var hwcap uint
diff --git a/private-gen.go b/private-gen.go
deleted file mode 100644
index dfd5669..0000000
--- a/private-gen.go
+++ /dev/null
@@ -1,485 +0,0 @@
-// +build ignore
-
-//go:generate go run private-gen.go
-//go:generate gofmt -w ./private
-
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"go/ast"
-	"go/parser"
-	"go/printer"
-	"go/token"
-	"io"
-	"io/ioutil"
-	"log"
-	"os"
-	"reflect"
-	"strings"
-	"unicode"
-	"unicode/utf8"
-)
-
-var inFiles = []string{"cpuid.go", "cpuid_test.go", "detect_arm64.go", "detect_ref.go", "detect_intel.go"}
-var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "cpuid_arm64.s"}
-var fileSet = token.NewFileSet()
-var reWrites = []rewrite{
-	initRewrite("CPUInfo -> cpuInfo"),
-	initRewrite("Vendor -> vendor"),
-	initRewrite("Flags -> flags"),
-	initRewrite("Detect -> detect"),
-	initRewrite("CPU -> cpu"),
-}
-var excludeNames = map[string]bool{"string": true, "join": true, "trim": true,
-	// cpuid_test.go
-	"t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true,
-	"maxuint32": true, "lastindex": true,
-}
-
-var excludePrefixes = []string{"test", "benchmark"}
-
-func main() {
-	Package := "private"
-	parserMode := parser.ParseComments
-	exported := make(map[string]rewrite)
-	for _, file := range inFiles {
-		in, err := os.Open(file)
-		if err != nil {
-			log.Fatalf("opening input", err)
-		}
-
-		src, err := ioutil.ReadAll(in)
-		if err != nil {
-			log.Fatalf("reading input", err)
-		}
-
-		astfile, err := parser.ParseFile(fileSet, file, src, parserMode)
-		if err != nil {
-			log.Fatalf("parsing input", err)
-		}
-
-		for _, rw := range reWrites {
-			astfile = rw(astfile)
-		}
-
-		// Inspect the AST and print all identifiers and literals.
-		var startDecl token.Pos
-		var endDecl token.Pos
-		ast.Inspect(astfile, func(n ast.Node) bool {
-			var s string
-			switch x := n.(type) {
-			case *ast.Ident:
-				if x.IsExported() {
-					t := strings.ToLower(x.Name)
-					for _, pre := range excludePrefixes {
-						if strings.HasPrefix(t, pre) {
-							return true
-						}
-					}
-					if excludeNames[t] != true {
-						//if x.Pos() > startDecl && x.Pos() < endDecl {
-						exported[x.Name] = initRewrite(x.Name + " -> " + t)
-					}
-				}
-
-			case *ast.GenDecl:
-				if x.Tok == token.CONST && x.Lparen > 0 {
-					startDecl = x.Lparen
-					endDecl = x.Rparen
-					// fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl))
-				}
-			}
-			if s != "" {
-				fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s)
-			}
-			return true
-		})
-
-		for _, rw := range exported {
-			astfile = rw(astfile)
-		}
-
-		var buf bytes.Buffer
-
-		printer.Fprint(&buf, fileSet, astfile)
-
-		// Remove package documentation and insert information
-		s := buf.String()
-		ind := strings.Index(buf.String(), "\npackage cpuid")
-		if i := strings.Index(buf.String(), "\n//+build "); i > 0 {
-			ind = i
-		}
-		s = s[ind:]
-		s = "// Generated, DO NOT EDIT,\n" +
-			"// but copy it to your own project and rename the package.\n" +
-			"// See more at http://github.com/klauspost/cpuid\n" +
-			s
-		if !strings.HasPrefix(file, "cpuid") {
-			file = "cpuid_" + file
-		}
-		outputName := Package + string(os.PathSeparator) + file
-
-		err = ioutil.WriteFile(outputName, []byte(s), 0644)
-		if err != nil {
-			log.Fatalf("writing output: %s", err)
-		}
-		log.Println("Generated", outputName)
-	}
-
-	for _, file := range copyFiles {
-		dst := ""
-		if strings.HasPrefix(file, "cpuid") {
-			dst = Package + string(os.PathSeparator) + file
-		} else {
-			dst = Package + string(os.PathSeparator) + "cpuid_" + file
-		}
-		err := copyFile(file, dst)
-		if err != nil {
-			log.Fatalf("copying file: %s", err)
-		}
-		log.Println("Copied", dst)
-	}
-}
-
-// CopyFile copies a file from src to dst. If src and dst files exist, and are
-// the same, then return success. Copy the file contents from src to dst.
-func copyFile(src, dst string) (err error) {
-	sfi, err := os.Stat(src)
-	if err != nil {
-		return
-	}
-	if !sfi.Mode().IsRegular() {
-		// cannot copy non-regular files (e.g., directories,
-		// symlinks, devices, etc.)
-		return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String())
-	}
-	dfi, err := os.Stat(dst)
-	if err != nil {
-		if !os.IsNotExist(err) {
-			return
-		}
-	} else {
-		if !(dfi.Mode().IsRegular()) {
-			return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String())
-		}
-		if os.SameFile(sfi, dfi) {
-			return
-		}
-	}
-	err = copyFileContents(src, dst)
-	return
-}
-
-// copyFileContents copies the contents of the file named src to the file named
-// by dst. The file will be created if it does not already exist. If the
-// destination file exists, all it's contents will be replaced by the contents
-// of the source file.
-func copyFileContents(src, dst string) (err error) {
-	in, err := os.Open(src)
-	if err != nil {
-		return
-	}
-	defer in.Close()
-	out, err := os.Create(dst)
-	if err != nil {
-		return
-	}
-	defer func() {
-		cerr := out.Close()
-		if err == nil {
-			err = cerr
-		}
-	}()
-	if _, err = io.Copy(out, in); err != nil {
-		return
-	}
-	err = out.Sync()
-	return
-}
-
-type rewrite func(*ast.File) *ast.File
-
-// Mostly copied from gofmt
-func initRewrite(rewriteRule string) rewrite {
-	f := strings.Split(rewriteRule, "->")
-	if len(f) != 2 {
-		fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n")
-		os.Exit(2)
-	}
-	pattern := parseExpr(f[0], "pattern")
-	replace := parseExpr(f[1], "replacement")
-	return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) }
-}
-
-// parseExpr parses s as an expression.
-// It might make sense to expand this to allow statement patterns,
-// but there are problems with preserving formatting and also
-// with what a wildcard for a statement looks like.
-func parseExpr(s, what string) ast.Expr {
-	x, err := parser.ParseExpr(s)
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err)
-		os.Exit(2)
-	}
-	return x
-}
-
-// Keep this function for debugging.
-/*
-func dump(msg string, val reflect.Value) {
-	fmt.Printf("%s:\n", msg)
-	ast.Print(fileSet, val.Interface())
-	fmt.Println()
-}
-*/
-
-// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file.
-func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File {
-	cmap := ast.NewCommentMap(fileSet, p, p.Comments)
-	m := make(map[string]reflect.Value)
-	pat := reflect.ValueOf(pattern)
-	repl := reflect.ValueOf(replace)
-
-	var rewriteVal func(val reflect.Value) reflect.Value
-	rewriteVal = func(val reflect.Value) reflect.Value {
-		// don't bother if val is invalid to start with
-		if !val.IsValid() {
-			return reflect.Value{}
-		}
-		for k := range m {
-			delete(m, k)
-		}
-		val = apply(rewriteVal, val)
-		if match(m, pat, val) {
-			val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos()))
-		}
-		return val
-	}
-
-	r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File)
-	r.Comments = cmap.Filter(r).Comments() // recreate comments list
-	return r
-}
-
-// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y.
-func set(x, y reflect.Value) {
-	// don't bother if x cannot be set or y is invalid
-	if !x.CanSet() || !y.IsValid() {
-		return
-	}
-	defer func() {
-		if x := recover(); x != nil {
-			if s, ok := x.(string); ok &&
-				(strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) {
-				// x cannot be set to y - ignore this rewrite
-				return
-			}
-			panic(x)
-		}
-	}()
-	x.Set(y)
-}
-
-// Values/types for special cases.
-var (
-	objectPtrNil = reflect.ValueOf((*ast.Object)(nil))
-	scopePtrNil  = reflect.ValueOf((*ast.Scope)(nil))
-
-	identType     = reflect.TypeOf((*ast.Ident)(nil))
-	objectPtrType = reflect.TypeOf((*ast.Object)(nil))
-	positionType  = reflect.TypeOf(token.NoPos)
-	callExprType  = reflect.TypeOf((*ast.CallExpr)(nil))
-	scopePtrType  = reflect.TypeOf((*ast.Scope)(nil))
-)
-
-// apply replaces each AST field x in val with f(x), returning val.
-// To avoid extra conversions, f operates on the reflect.Value form.
-func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value {
-	if !val.IsValid() {
-		return reflect.Value{}
-	}
-
-	// *ast.Objects introduce cycles and are likely incorrect after
-	// rewrite; don't follow them but replace with nil instead
-	if val.Type() == objectPtrType {
-		return objectPtrNil
-	}
-
-	// similarly for scopes: they are likely incorrect after a rewrite;
-	// replace them with nil
-	if val.Type() == scopePtrType {
-		return scopePtrNil
-	}
-
-	switch v := reflect.Indirect(val); v.Kind() {
-	case reflect.Slice:
-		for i := 0; i < v.Len(); i++ {
-			e := v.Index(i)
-			set(e, f(e))
-		}
-	case reflect.Struct:
-		for i := 0; i < v.NumField(); i++ {
-			e := v.Field(i)
-			set(e, f(e))
-		}
-	case reflect.Interface:
-		e := v.Elem()
-		set(v, f(e))
-	}
-	return val
-}
-
-func isWildcard(s string) bool {
-	rune, size := utf8.DecodeRuneInString(s)
-	return size == len(s) && unicode.IsLower(rune)
-}
-
-// match returns true if pattern matches val,
-// recording wildcard submatches in m.
-// If m == nil, match checks whether pattern == val.
-func match(m map[string]reflect.Value, pattern, val reflect.Value) bool {
-	// Wildcard matches any expression.  If it appears multiple
-	// times in the pattern, it must match the same expression
-	// each time.
-	if m != nil && pattern.IsValid() && pattern.Type() == identType {
-		name := pattern.Interface().(*ast.Ident).Name
-		if isWildcard(name) && val.IsValid() {
-			// wildcards only match valid (non-nil) expressions.
-			if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() {
-				if old, ok := m[name]; ok {
-					return match(nil, old, val)
-				}
-				m[name] = val
-				return true
-			}
-		}
-	}
-
-	// Otherwise, pattern and val must match recursively.
-	if !pattern.IsValid() || !val.IsValid() {
-		return !pattern.IsValid() && !val.IsValid()
-	}
-	if pattern.Type() != val.Type() {
-		return false
-	}
-
-	// Special cases.
-	switch pattern.Type() {
-	case identType:
-		// For identifiers, only the names need to match
-		// (and none of the other *ast.Object information).
-		// This is a common case, handle it all here instead
-		// of recursing down any further via reflection.
-		p := pattern.Interface().(*ast.Ident)
-		v := val.Interface().(*ast.Ident)
-		return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name
-	case objectPtrType, positionType:
-		// object pointers and token positions always match
-		return true
-	case callExprType:
-		// For calls, the Ellipsis fields (token.Position) must
-		// match since that is how f(x) and f(x...) are different.
-		// Check them here but fall through for the remaining fields.
-		p := pattern.Interface().(*ast.CallExpr)
-		v := val.Interface().(*ast.CallExpr)
-		if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() {
-			return false
-		}
-	}
-
-	p := reflect.Indirect(pattern)
-	v := reflect.Indirect(val)
-	if !p.IsValid() || !v.IsValid() {
-		return !p.IsValid() && !v.IsValid()
-	}
-
-	switch p.Kind() {
-	case reflect.Slice:
-		if p.Len() != v.Len() {
-			return false
-		}
-		for i := 0; i < p.Len(); i++ {
-			if !match(m, p.Index(i), v.Index(i)) {
-				return false
-			}
-		}
-		return true
-
-	case reflect.Struct:
-		for i := 0; i < p.NumField(); i++ {
-			if !match(m, p.Field(i), v.Field(i)) {
-				return false
-			}
-		}
-		return true
-
-	case reflect.Interface:
-		return match(m, p.Elem(), v.Elem())
-	}
-
-	// Handle token integers, etc.
-	return p.Interface() == v.Interface()
-}
-
-// subst returns a copy of pattern with values from m substituted in place
-// of wildcards and pos used as the position of tokens from the pattern.
-// if m == nil, subst returns a copy of pattern and doesn't change the line
-// number information.
-func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value {
-	if !pattern.IsValid() {
-		return reflect.Value{}
-	}
-
-	// Wildcard gets replaced with map value.
-	if m != nil && pattern.Type() == identType {
-		name := pattern.Interface().(*ast.Ident).Name
-		if isWildcard(name) {
-			if old, ok := m[name]; ok {
-				return subst(nil, old, reflect.Value{})
-			}
-		}
-	}
-
-	if pos.IsValid() && pattern.Type() == positionType {
-		// use new position only if old position was valid in the first place
-		if old := pattern.Interface().(token.Pos); !old.IsValid() {
-			return pattern
-		}
-		return pos
-	}
-
-	// Otherwise copy.
-	switch p := pattern; p.Kind() {
-	case reflect.Slice:
-		v := reflect.MakeSlice(p.Type(), p.Len(), p.Len())
-		for i := 0; i < p.Len(); i++ {
-			v.Index(i).Set(subst(m, p.Index(i), pos))
-		}
-		return v
-
-	case reflect.Struct:
-		v := reflect.New(p.Type()).Elem()
-		for i := 0; i < p.NumField(); i++ {
-			v.Field(i).Set(subst(m, p.Field(i), pos))
-		}
-		return v
-
-	case reflect.Ptr:
-		v := reflect.New(p.Type()).Elem()
-		if elem := p.Elem(); elem.IsValid() {
-			v.Set(subst(m, elem, pos).Addr())
-		}
-		return v
-
-	case reflect.Interface:
-		v := reflect.New(p.Type()).Elem()
-		if elem := p.Elem(); elem.IsValid() {
-			v.Set(subst(m, elem, pos))
-		}
-		return v
-	}
-
-	return pattern
-}
diff --git a/test-architectures.sh b/test-architectures.sh
new file mode 100755
index 0000000..471d986
--- /dev/null
+++ b/test-architectures.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+go tool dist list | while IFS=/ read os arch; do
+    echo "Checking $os/$arch..."
+    echo " normal"
+    GOARCH=$arch GOOS=$os go build -o /dev/null .
+    echo " noasm"
+    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
+    echo " appengine"
+    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
+    echo " noasm,appengine"
+    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
+done
diff --git a/testdata/cpuid_data.zip b/testdata/cpuid_data.zip
index 7975600..2d9a045 100644
Binary files a/testdata/cpuid_data.zip and b/testdata/cpuid_data.zip differ
diff --git a/testdata/getall.go b/testdata/getall.go
index f4d78a6..116456b 100644
--- a/testdata/getall.go
+++ b/testdata/getall.go
@@ -1,14 +1,17 @@
+//go:build ignore
+
 package main
 
 import (
 	"archive/zip"
 	_ "bytes"
 	"fmt"
-	"golang.org/x/net/html"
 	"io"
 	"net/http"
 	"os"
 	"strings"
+
+	"golang.org/x/net/html"
 )
 
 // Download all CPUID dumps from http://users.atw.hu/instlatx64/