@@ -64,6 +64,55 @@ func initOptions() {
6464
6565func archInit () {
6666
67+ const (
68+ // eax bits
69+ cpuid_AVXVNNI = 1 << 4
70+
71+ // ecx bits
72+ cpuid_SSE3 = 1 << 0
73+ cpuid_PCLMULQDQ = 1 << 1
74+ cpuid_AVX512VBMI = 1 << 1
75+ cpuid_AVX512VBMI2 = 1 << 6
76+ cpuid_SSSE3 = 1 << 9
77+ cpuid_AVX512GFNI = 1 << 8
78+ cpuid_AVX512VAES = 1 << 9
79+ cpuid_AVX512VNNI = 1 << 11
80+ cpuid_AVX512BITALG = 1 << 12
81+ cpuid_FMA = 1 << 12
82+ cpuid_AVX512VPOPCNTDQ = 1 << 14
83+ cpuid_SSE41 = 1 << 19
84+ cpuid_SSE42 = 1 << 20
85+ cpuid_POPCNT = 1 << 23
86+ cpuid_AES = 1 << 25
87+ cpuid_OSXSAVE = 1 << 27
88+ cpuid_AVX = 1 << 28
89+
90+ // "Extended Feature Flag" bits returned in EBX for CPUID EAX=0x7 ECX=0x0
91+ cpuid_BMI1 = 1 << 3
92+ cpuid_AVX2 = 1 << 5
93+ cpuid_BMI2 = 1 << 8
94+ cpuid_ERMS = 1 << 9
95+ cpuid_AVX512F = 1 << 16
96+ cpuid_AVX512DQ = 1 << 17
97+ cpuid_ADX = 1 << 19
98+ cpuid_AVX512CD = 1 << 28
99+ cpuid_SHA = 1 << 29
100+ cpuid_AVX512BW = 1 << 30
101+ cpuid_AVX512VL = 1 << 31
102+
103+ // "Extended Feature Flag" bits returned in ECX for CPUID EAX=0x7 ECX=0x0
104+ cpuid_AVX512_VBMI = 1 << 1
105+ cpuid_AVX512_VBMI2 = 1 << 6
106+ cpuid_GFNI = 1 << 8
107+ cpuid_AVX512VPCLMULQDQ = 1 << 10
108+ cpuid_AVX512_BITALG = 1 << 12
109+
110+ // edx bits
111+ cpuid_FSRM = 1 << 4
112+ // edx bits for CPUID 0x80000001
113+ cpuid_RDTSCP = 1 << 27
114+ )
115+
67116 Initialized = true
68117
69118 maxID , _ , _ , _ := cpuid (0 , 0 )
@@ -75,16 +124,16 @@ func archInit() {
75124 _ , _ , ecx1 , edx1 := cpuid (1 , 0 )
76125 X86 .HasSSE2 = isSet (edx1 , 1 << 26 )
77126
78- X86 .HasSSE3 = isSet (ecx1 , 1 << 0 )
79- X86 .HasPCLMULQDQ = isSet (ecx1 , 1 << 1 )
80- X86 .HasSSSE3 = isSet (ecx1 , 1 << 9 )
81- X86 .HasFMA = isSet (ecx1 , 1 << 12 )
127+ X86 .HasSSE3 = isSet (ecx1 , cpuid_SSE3 )
128+ X86 .HasPCLMULQDQ = isSet (ecx1 , cpuid_PCLMULQDQ )
129+ X86 .HasSSSE3 = isSet (ecx1 , cpuid_SSSE3 )
130+ X86 .HasFMA = isSet (ecx1 , cpuid_FMA )
82131 X86 .HasCX16 = isSet (ecx1 , 1 << 13 )
83- X86 .HasSSE41 = isSet (ecx1 , 1 << 19 )
84- X86 .HasSSE42 = isSet (ecx1 , 1 << 20 )
85- X86 .HasPOPCNT = isSet (ecx1 , 1 << 23 )
86- X86 .HasAES = isSet (ecx1 , 1 << 25 )
87- X86 .HasOSXSAVE = isSet (ecx1 , 1 << 27 )
132+ X86 .HasSSE41 = isSet (ecx1 , cpuid_SSE41 )
133+ X86 .HasSSE42 = isSet (ecx1 , cpuid_SSE42 )
134+ X86 .HasPOPCNT = isSet (ecx1 , cpuid_POPCNT )
135+ X86 .HasAES = isSet (ecx1 , cpuid_AES )
136+ X86 .HasOSXSAVE = isSet (ecx1 , cpuid_OSXSAVE )
88137 X86 .HasRDRAND = isSet (ecx1 , 1 << 30 )
89138
90139 var osSupportsAVX , osSupportsAVX512 bool
@@ -103,40 +152,40 @@ func archInit() {
103152 }
104153 }
105154
106- X86 .HasAVX = isSet (ecx1 , 1 << 28 ) && osSupportsAVX
155+ X86 .HasAVX = isSet (ecx1 , cpuid_AVX ) && osSupportsAVX
107156
108157 if maxID < 7 {
109158 return
110159 }
111160
112161 eax7 , ebx7 , ecx7 , edx7 := cpuid (7 , 0 )
113- X86 .HasBMI1 = isSet (ebx7 , 1 << 3 )
114- X86 .HasAVX2 = isSet (ebx7 , 1 << 5 ) && osSupportsAVX
115- X86 .HasBMI2 = isSet (ebx7 , 1 << 8 )
116- X86 .HasERMS = isSet (ebx7 , 1 << 9 )
162+ X86 .HasBMI1 = isSet (ebx7 , cpuid_BMI1 )
163+ X86 .HasAVX2 = isSet (ebx7 , cpuid_AVX2 ) && osSupportsAVX
164+ X86 .HasBMI2 = isSet (ebx7 , cpuid_BMI2 )
165+ X86 .HasERMS = isSet (ebx7 , cpuid_ERMS )
117166 X86 .HasRDSEED = isSet (ebx7 , 1 << 18 )
118- X86 .HasADX = isSet (ebx7 , 1 << 19 )
167+ X86 .HasADX = isSet (ebx7 , cpuid_ADX )
119168
120- X86 .HasAVX512 = isSet (ebx7 , 1 << 16 ) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
169+ X86 .HasAVX512 = isSet (ebx7 , cpuid_AVX512F ) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
121170 if X86 .HasAVX512 {
122171 X86 .HasAVX512F = true
123- X86 .HasAVX512CD = isSet (ebx7 , 1 << 28 )
172+ X86 .HasAVX512CD = isSet (ebx7 , cpuid_AVX512CD )
124173 X86 .HasAVX512ER = isSet (ebx7 , 1 << 27 )
125174 X86 .HasAVX512PF = isSet (ebx7 , 1 << 26 )
126- X86 .HasAVX512VL = isSet (ebx7 , 1 << 31 )
127- X86 .HasAVX512BW = isSet (ebx7 , 1 << 30 )
128- X86 .HasAVX512DQ = isSet (ebx7 , 1 << 17 )
175+ X86 .HasAVX512VL = isSet (ebx7 , cpuid_AVX512VL )
176+ X86 .HasAVX512BW = isSet (ebx7 , cpuid_AVX512BW )
177+ X86 .HasAVX512DQ = isSet (ebx7 , cpuid_AVX512DQ )
129178 X86 .HasAVX512IFMA = isSet (ebx7 , 1 << 21 )
130- X86 .HasAVX512VBMI = isSet (ecx7 , 1 << 1 )
179+ X86 .HasAVX512VBMI = isSet (ecx7 , cpuid_AVX512_VBMI )
131180 X86 .HasAVX5124VNNIW = isSet (edx7 , 1 << 2 )
132181 X86 .HasAVX5124FMAPS = isSet (edx7 , 1 << 3 )
133- X86 .HasAVX512VPOPCNTDQ = isSet (ecx7 , 1 << 14 )
134- X86 .HasAVX512VPCLMULQDQ = isSet (ecx7 , 1 << 10 )
135- X86 .HasAVX512VNNI = isSet (ecx7 , 1 << 11 )
136- X86 .HasAVX512GFNI = isSet (ecx7 , 1 << 8 )
137- X86 .HasAVX512VAES = isSet (ecx7 , 1 << 9 )
138- X86 .HasAVX512VBMI2 = isSet (ecx7 , 1 << 6 )
139- X86 .HasAVX512BITALG = isSet (ecx7 , 1 << 12 )
182+ X86 .HasAVX512VPOPCNTDQ = isSet (ecx7 , cpuid_AVX512VPOPCNTDQ )
183+ X86 .HasAVX512VPCLMULQDQ = isSet (ecx7 , cpuid_AVX512VPCLMULQDQ )
184+ X86 .HasAVX512VNNI = isSet (ecx7 , cpuid_AVX512VNNI )
185+ X86 .HasAVX512GFNI = isSet (ecx7 , cpuid_AVX512GFNI )
186+ X86 .HasAVX512VAES = isSet (ecx7 , cpuid_AVX512VAES )
187+ X86 .HasAVX512VBMI2 = isSet (ecx7 , cpuid_AVX512VBMI2 )
188+ X86 .HasAVX512BITALG = isSet (ecx7 , cpuid_AVX512BITALG )
140189 }
141190
142191 X86 .HasAMXTile = isSet (edx7 , 1 << 24 )
@@ -151,7 +200,7 @@ func archInit() {
151200 }
152201 if X86 .HasAVX {
153202 X86 .HasAVXIFMA = isSet (eax71 , 1 << 23 )
154- X86 .HasAVXVNNI = isSet (eax71 , 1 << 4 )
203+ X86 .HasAVXVNNI = isSet (eax71 , cpuid_AVXVNNI )
155204 X86 .HasAVXVNNIInt8 = isSet (edx71 , 1 << 4 )
156205 }
157206 }
0 commit comments