Correct naming of AVX512 specific test file
frankw
6 years ago
0 | /* | |
1 | * Minio Cloud Storage, (C) 2017 Minio, Inc. | |
2 | * | |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at | |
6 | * | |
7 | * http://www.apache.org/licenses/LICENSE-2.0 | |
8 | * | |
9 | * Unless required by applicable law or agreed to in writing, software | |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. | |
14 | */ | |
15 | ||
16 | package sha256 | |
17 | ||
18 | import ( | |
19 | "bytes" | |
20 | "encoding/binary" | |
21 | "encoding/hex" | |
22 | "fmt" | |
23 | "hash" | |
24 | "reflect" | |
25 | "sync" | |
26 | "testing" | |
27 | ) | |
28 | ||
29 | func TestGoldenAVX512(t *testing.T) { | |
30 | ||
31 | if !avx512 { | |
32 | t.SkipNow() | |
33 | return | |
34 | } | |
35 | ||
36 | server := NewAvx512Server() | |
37 | h512 := NewAvx512(server) | |
38 | ||
39 | for _, g := range golden { | |
40 | h512.Reset() | |
41 | h512.Write([]byte(g.in)) | |
42 | digest := h512.Sum([]byte{}) | |
43 | s := fmt.Sprintf("%x", digest) | |
44 | if !reflect.DeepEqual(digest, g.out[:]) { | |
45 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) | |
46 | } | |
47 | } | |
48 | } | |
49 | ||
50 | func createInputs(size int) [16][]byte { | |
51 | input := [16][]byte{} | |
52 | for i := 0; i < 16; i++ { | |
53 | input[i] = make([]byte, size) | |
54 | } | |
55 | return input | |
56 | } | |
57 | ||
58 | func initDigests() *[512]byte { | |
59 | digests := [512]byte{} | |
60 | for i := 0; i < 16; i++ { | |
61 | binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0) | |
62 | binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1) | |
63 | binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2) | |
64 | binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3) | |
65 | binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4) | |
66 | binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5) | |
67 | binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6) | |
68 | binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7) | |
69 | } | |
70 | return &digests | |
71 | } | |
72 | ||
73 | func testSha256Avx512(t *testing.T, offset, padding int) [16][]byte { | |
74 | ||
75 | if !avx512 { | |
76 | t.SkipNow() | |
77 | return [16][]byte{} | |
78 | } | |
79 | ||
80 | l := uint(len(golden[offset].in)) | |
81 | extraBlock := uint(0) | |
82 | if padding == 0 { | |
83 | extraBlock += 9 | |
84 | } else { | |
85 | extraBlock += 64 | |
86 | } | |
87 | input := createInputs(int(l + extraBlock)) | |
88 | for i := 0; i < 16; i++ { | |
89 | copy(input[i], golden[offset+i].in) | |
90 | input[i][l] = 0x80 | |
91 | copy(input[i][l+1:], bytes.Repeat([]byte{0}, padding)) | |
92 | ||
93 | // Length in bits. | |
94 | len := uint64(l) | |
95 | len <<= 3 | |
96 | for ii := uint(0); ii < 8; ii++ { | |
97 | input[i][l+1+uint(padding)+ii] = byte(len >> (56 - 8*ii)) | |
98 | } | |
99 | } | |
100 | mask := make([]uint64, len(input[0])>>6) | |
101 | for m := range mask { | |
102 | mask[m] = 0xffff | |
103 | } | |
104 | output := blockAvx512(initDigests(), input, mask) | |
105 | for i := 0; i < 16; i++ { | |
106 | if bytes.Compare(output[i][:], golden[offset+i].out[:]) != 0 { | |
107 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[i][:]), hex.EncodeToString(golden[offset+i].out[:])) | |
108 | } | |
109 | } | |
110 | return input | |
111 | } | |
112 | ||
113 | func TestAvx512_1Block(t *testing.T) { testSha256Avx512(t, 31, 0) } | |
114 | func TestAvx512_3Blocks(t *testing.T) { testSha256Avx512(t, 47, 55) } | |
115 | ||
116 | func TestAvx512_MixedBlocks(t *testing.T) { | |
117 | ||
118 | if !avx512 { | |
119 | t.SkipNow() | |
120 | return | |
121 | } | |
122 | ||
123 | inputSingleBlock := testSha256Avx512(t, 31, 0) | |
124 | inputMultiBlock := testSha256Avx512(t, 47, 55) | |
125 | ||
126 | input := [16][]byte{} | |
127 | ||
128 | for i := range input { | |
129 | if i%2 == 0 { | |
130 | input[i] = inputMultiBlock[i] | |
131 | } else { | |
132 | input[i] = inputSingleBlock[i] | |
133 | } | |
134 | } | |
135 | ||
136 | mask := [3]uint64{0xffff, 0x5555, 0x5555} | |
137 | output := blockAvx512(initDigests(), input, mask[:]) | |
138 | var offset int | |
139 | for i := 0; i < len(output); i++ { | |
140 | if i%2 == 0 { | |
141 | offset = 47 | |
142 | } else { | |
143 | offset = 31 | |
144 | } | |
145 | if bytes.Compare(output[i][:], golden[offset+i].out[:]) != 0 { | |
146 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[i][:]), hex.EncodeToString(golden[offset+i].out[:])) | |
147 | } | |
148 | } | |
149 | } | |
150 | ||
151 | func TestAvx512_MixedWithNilBlocks(t *testing.T) { | |
152 | ||
153 | if !avx512 { | |
154 | t.SkipNow() | |
155 | return | |
156 | } | |
157 | ||
158 | inputSingleBlock := testSha256Avx512(t, 31, 0) | |
159 | inputMultiBlock := testSha256Avx512(t, 47, 55) | |
160 | ||
161 | input := [16][]byte{} | |
162 | ||
163 | for i := range input { | |
164 | if i%3 == 0 { | |
165 | input[i] = inputMultiBlock[i] | |
166 | } else if i%3 == 1 { | |
167 | input[i] = inputSingleBlock[i] | |
168 | } else { | |
169 | input[i] = nil | |
170 | } | |
171 | } | |
172 | ||
173 | mask := [3]uint64{0xb6db, 0x9249, 0x9249} | |
174 | output := blockAvx512(initDigests(), input, mask[:]) | |
175 | var offset int | |
176 | for i := 0; i < len(output); i++ { | |
177 | if i%3 == 2 { // for nil inputs | |
178 | initvec := [32]byte{0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85, | |
179 | 0x3c, 0x6e, 0xf3, 0x72, 0xa5, 0x4f, 0xf5, 0x3a, | |
180 | 0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, 0x68, 0x8c, | |
181 | 0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19} | |
182 | if bytes.Compare(output[i][:], initvec[:]) != 0 { | |
183 | t.Fatalf("Sum256 function: sha256 for nil vector = %s want %s", hex.EncodeToString(output[i][:]), hex.EncodeToString(initvec[:])) | |
184 | } | |
185 | continue | |
186 | } | |
187 | if i%3 == 0 { | |
188 | offset = 47 | |
189 | } else { | |
190 | offset = 31 | |
191 | } | |
192 | if bytes.Compare(output[i][:], golden[offset+i].out[:]) != 0 { | |
193 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[i][:]), hex.EncodeToString(golden[offset+i].out[:])) | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | func TestAvx512Server(t *testing.T) { | |
199 | ||
200 | if !avx512 { | |
201 | t.SkipNow() | |
202 | return | |
203 | } | |
204 | ||
205 | const offset = 31 + 16 | |
206 | server := NewAvx512Server() | |
207 | ||
208 | // First block of 64 bytes | |
209 | for i := 0; i < 16; i++ { | |
210 | input := make([]byte, 64) | |
211 | copy(input, golden[offset+i].in) | |
212 | server.Write(uint64(Avx512ServerUid+i), input) | |
213 | } | |
214 | ||
215 | // Second block of 64 bytes | |
216 | for i := 0; i < 16; i++ { | |
217 | input := make([]byte, 64) | |
218 | copy(input, golden[offset+i].in[64:]) | |
219 | server.Write(uint64(Avx512ServerUid+i), input) | |
220 | } | |
221 | ||
222 | wg := sync.WaitGroup{} | |
223 | wg.Add(16) | |
224 | ||
225 | // Third and final block | |
226 | for i := 0; i < 16; i++ { | |
227 | input := make([]byte, 64) | |
228 | input[0] = 0x80 | |
229 | copy(input[1:], bytes.Repeat([]byte{0}, 63-8)) | |
230 | ||
231 | // Length in bits. | |
232 | len := uint64(128) | |
233 | len <<= 3 | |
234 | for ii := uint(0); ii < 8; ii++ { | |
235 | input[63-8+1+ii] = byte(len >> (56 - 8*ii)) | |
236 | } | |
237 | go func(i int, uid uint64, input []byte) { | |
238 | output := server.Sum(uid, input) | |
239 | if bytes.Compare(output[:], golden[offset+i].out[:]) != 0 { | |
240 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:])) | |
241 | } | |
242 | wg.Done() | |
243 | }(i, uint64(Avx512ServerUid+i), input) | |
244 | } | |
245 | ||
246 | wg.Wait() | |
247 | } | |
248 | ||
249 | func TestAvx512Digest(t *testing.T) { | |
250 | ||
251 | if !avx512 { | |
252 | t.SkipNow() | |
253 | return | |
254 | } | |
255 | ||
256 | server := NewAvx512Server() | |
257 | ||
258 | const tests = 16 | |
259 | h512 := [16]hash.Hash{} | |
260 | for i := 0; i < tests; i++ { | |
261 | h512[i] = NewAvx512(server) | |
262 | } | |
263 | ||
264 | const offset = 31 + 16 | |
265 | for i := 0; i < tests; i++ { | |
266 | input := make([]byte, 64) | |
267 | copy(input, golden[offset+i].in) | |
268 | h512[i].Write(input) | |
269 | } | |
270 | for i := 0; i < tests; i++ { | |
271 | input := make([]byte, 64) | |
272 | copy(input, golden[offset+i].in[64:]) | |
273 | h512[i].Write(input) | |
274 | } | |
275 | for i := 0; i < tests; i++ { | |
276 | output := h512[i].Sum([]byte{}) | |
277 | if bytes.Compare(output[:], golden[offset+i].out[:]) != 0 { | |
278 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:])) | |
279 | } | |
280 | } | |
281 | } | |
282 | ||
283 | func benchmarkAvx512SingleCore(h512 []hash.Hash, body []byte) { | |
284 | ||
285 | for i := 0; i < len(h512); i++ { | |
286 | h512[i].Write(body) | |
287 | } | |
288 | for i := 0; i < len(h512); i++ { | |
289 | _ = h512[i].Sum([]byte{}) | |
290 | } | |
291 | } | |
292 | ||
293 | func benchmarkAvx512(b *testing.B, size int) { | |
294 | ||
295 | if !avx512 { | |
296 | b.SkipNow() | |
297 | return | |
298 | } | |
299 | ||
300 | server := NewAvx512Server() | |
301 | ||
302 | const tests = 16 | |
303 | body := make([]byte, size) | |
304 | ||
305 | b.SetBytes(int64(len(body) * tests)) | |
306 | b.ResetTimer() | |
307 | ||
308 | for i := 0; i < b.N; i++ { | |
309 | h512 := make([]hash.Hash, tests) | |
310 | for i := 0; i < tests; i++ { | |
311 | h512[i] = NewAvx512(server) | |
312 | } | |
313 | ||
314 | benchmarkAvx512SingleCore(h512, body) | |
315 | } | |
316 | } | |
317 | ||
318 | func BenchmarkAvx512_05M(b *testing.B) { benchmarkAvx512(b, 512*1024) } | |
319 | func BenchmarkAvx512_1M(b *testing.B) { benchmarkAvx512(b, 1*1024*1024) } | |
320 | func BenchmarkAvx512_5M(b *testing.B) { benchmarkAvx512(b, 5*1024*1024) } | |
321 | func BenchmarkAvx512_10M(b *testing.B) { benchmarkAvx512(b, 10*1024*1024) } | |
322 | ||
323 | func benchmarkAvx512MultiCore(b *testing.B, size, cores int) { | |
324 | ||
325 | if !avx512 { | |
326 | b.SkipNow() | |
327 | return | |
328 | } | |
329 | ||
330 | servers := make([]*Avx512Server, cores) | |
331 | for c := 0; c < cores; c++ { | |
332 | servers[c] = NewAvx512Server() | |
333 | } | |
334 | ||
335 | const tests = 16 | |
336 | ||
337 | body := make([]byte, size) | |
338 | ||
339 | h512 := make([]hash.Hash, tests*cores) | |
340 | for i := 0; i < tests*cores; i++ { | |
341 | h512[i] = NewAvx512(servers[i>>4]) | |
342 | } | |
343 | ||
344 | b.SetBytes(int64(size * 16 * cores)) | |
345 | b.ResetTimer() | |
346 | ||
347 | var wg sync.WaitGroup | |
348 | ||
349 | for i := 0; i < b.N; i++ { | |
350 | wg.Add(cores) | |
351 | for c := 0; c < cores; c++ { | |
352 | go func(c int) { benchmarkAvx512SingleCore(h512[c*tests:(c+1)*tests], body); wg.Done() }(c) | |
353 | } | |
354 | wg.Wait() | |
355 | } | |
356 | } | |
357 | ||
358 | func BenchmarkAvx512_5M_2Cores(b *testing.B) { benchmarkAvx512MultiCore(b, 5*1024*1024, 2) } | |
359 | func BenchmarkAvx512_5M_4Cores(b *testing.B) { benchmarkAvx512MultiCore(b, 5*1024*1024, 4) } | |
360 | func BenchmarkAvx512_5M_6Cores(b *testing.B) { benchmarkAvx512MultiCore(b, 5*1024*1024, 6) } | |
361 | ||
362 | type maskTest struct { | |
363 | in [16]int | |
364 | out [16]maskRounds | |
365 | } | |
366 | ||
367 | var goldenMask = []maskTest{ | |
368 | {[16]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [16]maskRounds{}}, | |
369 | {[16]int{64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0}, [16]maskRounds{{0x5555, 1}}}, | |
370 | {[16]int{0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64}, [16]maskRounds{{0xaaaa, 1}}}, | |
371 | {[16]int{64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64}, [16]maskRounds{{0xffff, 1}}}, | |
372 | {[16]int{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, [16]maskRounds{{0xffff, 2}}}, | |
373 | {[16]int{64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128}, [16]maskRounds{{0xffff, 1}, {0xaaaa, 1}}}, | |
374 | {[16]int{128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64}, [16]maskRounds{{0xffff, 1}, {0x5555, 1}}}, | |
375 | {[16]int{64, 192, 64, 192, 64, 192, 64, 192, 64, 192, 64, 192, 64, 192, 64, 192}, [16]maskRounds{{0xffff, 1}, {0xaaaa, 2}}}, | |
376 | // | |
377 | // >= 64 0110=6 1011=b 1101=d 0110=6 | |
378 | // >=128 0100=4 0010=2 1001=9 0100=4 | |
379 | {[16]int{0, 64, 128, 0, 64, 128, 0, 64, 128, 0, 64, 128, 0, 64, 128, 0}, [16]maskRounds{{0x6db6, 1}, {0x4924, 1}}}, | |
380 | {[16]int{1 * 64, 2 * 64, 3 * 64, 4 * 64, 5 * 64, 6 * 64, 7 * 64, 8 * 64, 9 * 64, 10 * 64, 11 * 64, 12 * 64, 13 * 64, 14 * 64, 15 * 64, 16 * 64}, | |
381 | [16]maskRounds{{0xffff, 1}, {0xfffe, 1}, {0xfffc, 1}, {0xfff8, 1}, {0xfff0, 1}, {0xffe0, 1}, {0xffc0, 1}, {0xff80, 1}, | |
382 | {0xff00, 1}, {0xfe00, 1}, {0xfc00, 1}, {0xf800, 1}, {0xf000, 1}, {0xe000, 1}, {0xc000, 1}, {0x8000, 1}}}, | |
383 | {[16]int{2 * 64, 1 * 64, 3 * 64, 4 * 64, 5 * 64, 6 * 64, 7 * 64, 8 * 64, 9 * 64, 10 * 64, 11 * 64, 12 * 64, 13 * 64, 14 * 64, 15 * 64, 16 * 64}, | |
384 | [16]maskRounds{{0xffff, 1}, {0xfffd, 1}, {0xfffc, 1}, {0xfff8, 1}, {0xfff0, 1}, {0xffe0, 1}, {0xffc0, 1}, {0xff80, 1}, | |
385 | {0xff00, 1}, {0xfe00, 1}, {0xfc00, 1}, {0xf800, 1}, {0xf000, 1}, {0xe000, 1}, {0xc000, 1}, {0x8000, 1}}}, | |
386 | {[16]int{10 * 64, 20 * 64, 30 * 64, 40 * 64, 50 * 64, 60 * 64, 70 * 64, 80 * 64, 90 * 64, 100 * 64, 110 * 64, 120 * 64, 130 * 64, 140 * 64, 150 * 64, 160 * 64}, | |
387 | [16]maskRounds{{0xffff, 10}, {0xfffe, 10}, {0xfffc, 10}, {0xfff8, 10}, {0xfff0, 10}, {0xffe0, 10}, {0xffc0, 10}, {0xff80, 10}, | |
388 | {0xff00, 10}, {0xfe00, 10}, {0xfc00, 10}, {0xf800, 10}, {0xf000, 10}, {0xe000, 10}, {0xc000, 10}, {0x8000, 10}}}, | |
389 | {[16]int{10 * 64, 19 * 64, 27 * 64, 34 * 64, 40 * 64, 45 * 64, 49 * 64, 52 * 64, 54 * 64, 55 * 64, 57 * 64, 60 * 64, 64 * 64, 69 * 64, 75 * 64, 82 * 64}, | |
390 | [16]maskRounds{{0xffff, 10}, {0xfffe, 9}, {0xfffc, 8}, {0xfff8, 7}, {0xfff0, 6}, {0xffe0, 5}, {0xffc0, 4}, {0xff80, 3}, | |
391 | {0xff00, 2}, {0xfe00, 1}, {0xfc00, 2}, {0xf800, 3}, {0xf000, 4}, {0xe000, 5}, {0xc000, 6}, {0x8000, 7}}}, | |
392 | } | |
393 | ||
394 | func TestMaskGen(t *testing.T) { | |
395 | input := [16][]byte{} | |
396 | for gcase, g := range goldenMask { | |
397 | for i, l := range g.in { | |
398 | buf := make([]byte, l) | |
399 | input[i] = buf[:] | |
400 | } | |
401 | ||
402 | mr := genMask(input) | |
403 | ||
404 | if !reflect.DeepEqual(mr, g.out) { | |
405 | t.Fatalf("case %d: got %04x\n want %04x", gcase, mr, g.out) | |
406 | } | |
407 | } | |
408 | } |
0 | /* | |
1 | * Minio Cloud Storage, (C) 2017 Minio, Inc. | |
2 | * | |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at | |
6 | * | |
7 | * http://www.apache.org/licenses/LICENSE-2.0 | |
8 | * | |
9 | * Unless required by applicable law or agreed to in writing, software | |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. | |
14 | */ | |
15 | ||
16 | // +build amd64 | |
17 | ||
18 | package sha256 | |
19 | ||
20 | import ( | |
21 | "bytes" | |
22 | "encoding/binary" | |
23 | "encoding/hex" | |
24 | "fmt" | |
25 | "hash" | |
26 | "reflect" | |
27 | "sync" | |
28 | "testing" | |
29 | ) | |
30 | ||
31 | func TestGoldenAVX512(t *testing.T) { | |
32 | ||
33 | if !avx512 { | |
34 | t.SkipNow() | |
35 | return | |
36 | } | |
37 | ||
38 | server := NewAvx512Server() | |
39 | h512 := NewAvx512(server) | |
40 | ||
41 | for _, g := range golden { | |
42 | h512.Reset() | |
43 | h512.Write([]byte(g.in)) | |
44 | digest := h512.Sum([]byte{}) | |
45 | s := fmt.Sprintf("%x", digest) | |
46 | if !reflect.DeepEqual(digest, g.out[:]) { | |
47 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) | |
48 | } | |
49 | } | |
50 | } | |
51 | ||
52 | func createInputs(size int) [16][]byte { | |
53 | input := [16][]byte{} | |
54 | for i := 0; i < 16; i++ { | |
55 | input[i] = make([]byte, size) | |
56 | } | |
57 | return input | |
58 | } | |
59 | ||
60 | func initDigests() *[512]byte { | |
61 | digests := [512]byte{} | |
62 | for i := 0; i < 16; i++ { | |
63 | binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0) | |
64 | binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1) | |
65 | binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2) | |
66 | binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3) | |
67 | binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4) | |
68 | binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5) | |
69 | binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6) | |
70 | binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7) | |
71 | } | |
72 | return &digests | |
73 | } | |
74 | ||
75 | func testSha256Avx512(t *testing.T, offset, padding int) [16][]byte { | |
76 | ||
77 | if !avx512 { | |
78 | t.SkipNow() | |
79 | return [16][]byte{} | |
80 | } | |
81 | ||
82 | l := uint(len(golden[offset].in)) | |
83 | extraBlock := uint(0) | |
84 | if padding == 0 { | |
85 | extraBlock += 9 | |
86 | } else { | |
87 | extraBlock += 64 | |
88 | } | |
89 | input := createInputs(int(l + extraBlock)) | |
90 | for i := 0; i < 16; i++ { | |
91 | copy(input[i], golden[offset+i].in) | |
92 | input[i][l] = 0x80 | |
93 | copy(input[i][l+1:], bytes.Repeat([]byte{0}, padding)) | |
94 | ||
95 | // Length in bits. | |
96 | len := uint64(l) | |
97 | len <<= 3 | |
98 | for ii := uint(0); ii < 8; ii++ { | |
99 | input[i][l+1+uint(padding)+ii] = byte(len >> (56 - 8*ii)) | |
100 | } | |
101 | } | |
102 | mask := make([]uint64, len(input[0])>>6) | |
103 | for m := range mask { | |
104 | mask[m] = 0xffff | |
105 | } | |
106 | output := blockAvx512(initDigests(), input, mask) | |
107 | for i := 0; i < 16; i++ { | |
108 | if bytes.Compare(output[i][:], golden[offset+i].out[:]) != 0 { | |
109 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[i][:]), hex.EncodeToString(golden[offset+i].out[:])) | |
110 | } | |
111 | } | |
112 | return input | |
113 | } | |
114 | ||
115 | func TestAvx512_1Block(t *testing.T) { testSha256Avx512(t, 31, 0) } | |
116 | func TestAvx512_3Blocks(t *testing.T) { testSha256Avx512(t, 47, 55) } | |
117 | ||
118 | func TestAvx512_MixedBlocks(t *testing.T) { | |
119 | ||
120 | if !avx512 { | |
121 | t.SkipNow() | |
122 | return | |
123 | } | |
124 | ||
125 | inputSingleBlock := testSha256Avx512(t, 31, 0) | |
126 | inputMultiBlock := testSha256Avx512(t, 47, 55) | |
127 | ||
128 | input := [16][]byte{} | |
129 | ||
130 | for i := range input { | |
131 | if i%2 == 0 { | |
132 | input[i] = inputMultiBlock[i] | |
133 | } else { | |
134 | input[i] = inputSingleBlock[i] | |
135 | } | |
136 | } | |
137 | ||
138 | mask := [3]uint64{0xffff, 0x5555, 0x5555} | |
139 | output := blockAvx512(initDigests(), input, mask[:]) | |
140 | var offset int | |
141 | for i := 0; i < len(output); i++ { | |
142 | if i%2 == 0 { | |
143 | offset = 47 | |
144 | } else { | |
145 | offset = 31 | |
146 | } | |
147 | if bytes.Compare(output[i][:], golden[offset+i].out[:]) != 0 { | |
148 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[i][:]), hex.EncodeToString(golden[offset+i].out[:])) | |
149 | } | |
150 | } | |
151 | } | |
152 | ||
153 | func TestAvx512_MixedWithNilBlocks(t *testing.T) { | |
154 | ||
155 | if !avx512 { | |
156 | t.SkipNow() | |
157 | return | |
158 | } | |
159 | ||
160 | inputSingleBlock := testSha256Avx512(t, 31, 0) | |
161 | inputMultiBlock := testSha256Avx512(t, 47, 55) | |
162 | ||
163 | input := [16][]byte{} | |
164 | ||
165 | for i := range input { | |
166 | if i%3 == 0 { | |
167 | input[i] = inputMultiBlock[i] | |
168 | } else if i%3 == 1 { | |
169 | input[i] = inputSingleBlock[i] | |
170 | } else { | |
171 | input[i] = nil | |
172 | } | |
173 | } | |
174 | ||
175 | mask := [3]uint64{0xb6db, 0x9249, 0x9249} | |
176 | output := blockAvx512(initDigests(), input, mask[:]) | |
177 | var offset int | |
178 | for i := 0; i < len(output); i++ { | |
179 | if i%3 == 2 { // for nil inputs | |
180 | initvec := [32]byte{0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85, | |
181 | 0x3c, 0x6e, 0xf3, 0x72, 0xa5, 0x4f, 0xf5, 0x3a, | |
182 | 0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, 0x68, 0x8c, | |
183 | 0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19} | |
184 | if bytes.Compare(output[i][:], initvec[:]) != 0 { | |
185 | t.Fatalf("Sum256 function: sha256 for nil vector = %s want %s", hex.EncodeToString(output[i][:]), hex.EncodeToString(initvec[:])) | |
186 | } | |
187 | continue | |
188 | } | |
189 | if i%3 == 0 { | |
190 | offset = 47 | |
191 | } else { | |
192 | offset = 31 | |
193 | } | |
194 | if bytes.Compare(output[i][:], golden[offset+i].out[:]) != 0 { | |
195 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[i][:]), hex.EncodeToString(golden[offset+i].out[:])) | |
196 | } | |
197 | } | |
198 | } | |
199 | ||
200 | func TestAvx512Server(t *testing.T) { | |
201 | ||
202 | if !avx512 { | |
203 | t.SkipNow() | |
204 | return | |
205 | } | |
206 | ||
207 | const offset = 31 + 16 | |
208 | server := NewAvx512Server() | |
209 | ||
210 | // First block of 64 bytes | |
211 | for i := 0; i < 16; i++ { | |
212 | input := make([]byte, 64) | |
213 | copy(input, golden[offset+i].in) | |
214 | server.Write(uint64(Avx512ServerUid+i), input) | |
215 | } | |
216 | ||
217 | // Second block of 64 bytes | |
218 | for i := 0; i < 16; i++ { | |
219 | input := make([]byte, 64) | |
220 | copy(input, golden[offset+i].in[64:]) | |
221 | server.Write(uint64(Avx512ServerUid+i), input) | |
222 | } | |
223 | ||
224 | wg := sync.WaitGroup{} | |
225 | wg.Add(16) | |
226 | ||
227 | // Third and final block | |
228 | for i := 0; i < 16; i++ { | |
229 | input := make([]byte, 64) | |
230 | input[0] = 0x80 | |
231 | copy(input[1:], bytes.Repeat([]byte{0}, 63-8)) | |
232 | ||
233 | // Length in bits. | |
234 | len := uint64(128) | |
235 | len <<= 3 | |
236 | for ii := uint(0); ii < 8; ii++ { | |
237 | input[63-8+1+ii] = byte(len >> (56 - 8*ii)) | |
238 | } | |
239 | go func(i int, uid uint64, input []byte) { | |
240 | output := server.Sum(uid, input) | |
241 | if bytes.Compare(output[:], golden[offset+i].out[:]) != 0 { | |
242 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:])) | |
243 | } | |
244 | wg.Done() | |
245 | }(i, uint64(Avx512ServerUid+i), input) | |
246 | } | |
247 | ||
248 | wg.Wait() | |
249 | } | |
250 | ||
251 | func TestAvx512Digest(t *testing.T) { | |
252 | ||
253 | if !avx512 { | |
254 | t.SkipNow() | |
255 | return | |
256 | } | |
257 | ||
258 | server := NewAvx512Server() | |
259 | ||
260 | const tests = 16 | |
261 | h512 := [16]hash.Hash{} | |
262 | for i := 0; i < tests; i++ { | |
263 | h512[i] = NewAvx512(server) | |
264 | } | |
265 | ||
266 | const offset = 31 + 16 | |
267 | for i := 0; i < tests; i++ { | |
268 | input := make([]byte, 64) | |
269 | copy(input, golden[offset+i].in) | |
270 | h512[i].Write(input) | |
271 | } | |
272 | for i := 0; i < tests; i++ { | |
273 | input := make([]byte, 64) | |
274 | copy(input, golden[offset+i].in[64:]) | |
275 | h512[i].Write(input) | |
276 | } | |
277 | for i := 0; i < tests; i++ { | |
278 | output := h512[i].Sum([]byte{}) | |
279 | if bytes.Compare(output[:], golden[offset+i].out[:]) != 0 { | |
280 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:])) | |
281 | } | |
282 | } | |
283 | } | |
284 | ||
285 | func benchmarkAvx512SingleCore(h512 []hash.Hash, body []byte) { | |
286 | ||
287 | for i := 0; i < len(h512); i++ { | |
288 | h512[i].Write(body) | |
289 | } | |
290 | for i := 0; i < len(h512); i++ { | |
291 | _ = h512[i].Sum([]byte{}) | |
292 | } | |
293 | } | |
294 | ||
295 | func benchmarkAvx512(b *testing.B, size int) { | |
296 | ||
297 | if !avx512 { | |
298 | b.SkipNow() | |
299 | return | |
300 | } | |
301 | ||
302 | server := NewAvx512Server() | |
303 | ||
304 | const tests = 16 | |
305 | body := make([]byte, size) | |
306 | ||
307 | b.SetBytes(int64(len(body) * tests)) | |
308 | b.ResetTimer() | |
309 | ||
310 | for i := 0; i < b.N; i++ { | |
311 | h512 := make([]hash.Hash, tests) | |
312 | for i := 0; i < tests; i++ { | |
313 | h512[i] = NewAvx512(server) | |
314 | } | |
315 | ||
316 | benchmarkAvx512SingleCore(h512, body) | |
317 | } | |
318 | } | |
319 | ||
320 | func BenchmarkAvx512_05M(b *testing.B) { benchmarkAvx512(b, 512*1024) } | |
321 | func BenchmarkAvx512_1M(b *testing.B) { benchmarkAvx512(b, 1*1024*1024) } | |
322 | func BenchmarkAvx512_5M(b *testing.B) { benchmarkAvx512(b, 5*1024*1024) } | |
323 | func BenchmarkAvx512_10M(b *testing.B) { benchmarkAvx512(b, 10*1024*1024) } | |
324 | ||
325 | func benchmarkAvx512MultiCore(b *testing.B, size, cores int) { | |
326 | ||
327 | if !avx512 { | |
328 | b.SkipNow() | |
329 | return | |
330 | } | |
331 | ||
332 | servers := make([]*Avx512Server, cores) | |
333 | for c := 0; c < cores; c++ { | |
334 | servers[c] = NewAvx512Server() | |
335 | } | |
336 | ||
337 | const tests = 16 | |
338 | ||
339 | body := make([]byte, size) | |
340 | ||
341 | h512 := make([]hash.Hash, tests*cores) | |
342 | for i := 0; i < tests*cores; i++ { | |
343 | h512[i] = NewAvx512(servers[i>>4]) | |
344 | } | |
345 | ||
346 | b.SetBytes(int64(size * 16 * cores)) | |
347 | b.ResetTimer() | |
348 | ||
349 | var wg sync.WaitGroup | |
350 | ||
351 | for i := 0; i < b.N; i++ { | |
352 | wg.Add(cores) | |
353 | for c := 0; c < cores; c++ { | |
354 | go func(c int) { benchmarkAvx512SingleCore(h512[c*tests:(c+1)*tests], body); wg.Done() }(c) | |
355 | } | |
356 | wg.Wait() | |
357 | } | |
358 | } | |
359 | ||
360 | func BenchmarkAvx512_5M_2Cores(b *testing.B) { benchmarkAvx512MultiCore(b, 5*1024*1024, 2) } | |
361 | func BenchmarkAvx512_5M_4Cores(b *testing.B) { benchmarkAvx512MultiCore(b, 5*1024*1024, 4) } | |
362 | func BenchmarkAvx512_5M_6Cores(b *testing.B) { benchmarkAvx512MultiCore(b, 5*1024*1024, 6) } | |
363 | ||
364 | type maskTest struct { | |
365 | in [16]int | |
366 | out [16]maskRounds | |
367 | } | |
368 | ||
369 | var goldenMask = []maskTest{ | |
370 | {[16]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [16]maskRounds{}}, | |
371 | {[16]int{64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0}, [16]maskRounds{{0x5555, 1}}}, | |
372 | {[16]int{0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64}, [16]maskRounds{{0xaaaa, 1}}}, | |
373 | {[16]int{64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64}, [16]maskRounds{{0xffff, 1}}}, | |
374 | {[16]int{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}, [16]maskRounds{{0xffff, 2}}}, | |
375 | {[16]int{64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128}, [16]maskRounds{{0xffff, 1}, {0xaaaa, 1}}}, | |
376 | {[16]int{128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64, 128, 64}, [16]maskRounds{{0xffff, 1}, {0x5555, 1}}}, | |
377 | {[16]int{64, 192, 64, 192, 64, 192, 64, 192, 64, 192, 64, 192, 64, 192, 64, 192}, [16]maskRounds{{0xffff, 1}, {0xaaaa, 2}}}, | |
378 | // | |
379 | // >= 64 0110=6 1011=b 1101=d 0110=6 | |
380 | // >=128 0100=4 0010=2 1001=9 0100=4 | |
381 | {[16]int{0, 64, 128, 0, 64, 128, 0, 64, 128, 0, 64, 128, 0, 64, 128, 0}, [16]maskRounds{{0x6db6, 1}, {0x4924, 1}}}, | |
382 | {[16]int{1 * 64, 2 * 64, 3 * 64, 4 * 64, 5 * 64, 6 * 64, 7 * 64, 8 * 64, 9 * 64, 10 * 64, 11 * 64, 12 * 64, 13 * 64, 14 * 64, 15 * 64, 16 * 64}, | |
383 | [16]maskRounds{{0xffff, 1}, {0xfffe, 1}, {0xfffc, 1}, {0xfff8, 1}, {0xfff0, 1}, {0xffe0, 1}, {0xffc0, 1}, {0xff80, 1}, | |
384 | {0xff00, 1}, {0xfe00, 1}, {0xfc00, 1}, {0xf800, 1}, {0xf000, 1}, {0xe000, 1}, {0xc000, 1}, {0x8000, 1}}}, | |
385 | {[16]int{2 * 64, 1 * 64, 3 * 64, 4 * 64, 5 * 64, 6 * 64, 7 * 64, 8 * 64, 9 * 64, 10 * 64, 11 * 64, 12 * 64, 13 * 64, 14 * 64, 15 * 64, 16 * 64}, | |
386 | [16]maskRounds{{0xffff, 1}, {0xfffd, 1}, {0xfffc, 1}, {0xfff8, 1}, {0xfff0, 1}, {0xffe0, 1}, {0xffc0, 1}, {0xff80, 1}, | |
387 | {0xff00, 1}, {0xfe00, 1}, {0xfc00, 1}, {0xf800, 1}, {0xf000, 1}, {0xe000, 1}, {0xc000, 1}, {0x8000, 1}}}, | |
388 | {[16]int{10 * 64, 20 * 64, 30 * 64, 40 * 64, 50 * 64, 60 * 64, 70 * 64, 80 * 64, 90 * 64, 100 * 64, 110 * 64, 120 * 64, 130 * 64, 140 * 64, 150 * 64, 160 * 64}, | |
389 | [16]maskRounds{{0xffff, 10}, {0xfffe, 10}, {0xfffc, 10}, {0xfff8, 10}, {0xfff0, 10}, {0xffe0, 10}, {0xffc0, 10}, {0xff80, 10}, | |
390 | {0xff00, 10}, {0xfe00, 10}, {0xfc00, 10}, {0xf800, 10}, {0xf000, 10}, {0xe000, 10}, {0xc000, 10}, {0x8000, 10}}}, | |
391 | {[16]int{10 * 64, 19 * 64, 27 * 64, 34 * 64, 40 * 64, 45 * 64, 49 * 64, 52 * 64, 54 * 64, 55 * 64, 57 * 64, 60 * 64, 64 * 64, 69 * 64, 75 * 64, 82 * 64}, | |
392 | [16]maskRounds{{0xffff, 10}, {0xfffe, 9}, {0xfffc, 8}, {0xfff8, 7}, {0xfff0, 6}, {0xffe0, 5}, {0xffc0, 4}, {0xff80, 3}, | |
393 | {0xff00, 2}, {0xfe00, 1}, {0xfc00, 2}, {0xf800, 3}, {0xf000, 4}, {0xe000, 5}, {0xc000, 6}, {0x8000, 7}}}, | |
394 | } | |
395 | ||
396 | func TestMaskGen(t *testing.T) { | |
397 | input := [16][]byte{} | |
398 | for gcase, g := range goldenMask { | |
399 | for i, l := range g.in { | |
400 | buf := make([]byte, l) | |
401 | input[i] = buf[:] | |
402 | } | |
403 | ||
404 | mr := genMask(input) | |
405 | ||
406 | if !reflect.DeepEqual(mr, g.out) { | |
407 | t.Fatalf("case %d: got %04x\n want %04x", gcase, mr, g.out) | |
408 | } | |
409 | } | |
410 | } |