Codebase list golang-github-minio-sha256-simd / e529fa1
Support SHA* intrinsics on Intel CPU (#37) * Support SHA* intrinsics on Intel CPU - optimise: select block function at init - added dedicated padding function, optimised endian conversion - add assembly for Intel SHA extensions - update benchmarks - stream line checksum function - cleanup of sha assembly code * Cleanup code to be idiomatic Go Harshavardhana authored 5 years ago Frank Wessels committed 5 years ago
22 changed file(s) with 2845 addition(s) and 2257 deletion(s). Raw diff Collapse all Expand all
0 *.test
66
77 go:
88 - tip
9 - 1.11
10 - 1.10
9 - 1.11.x
1110
1211 env:
1312 - ARCH=x86_64
1515 package sha256
1616
1717 // True when SIMD instructions are available.
18 var avx512 = haveAVX512()
19 var avx2 = haveAVX2()
20 var avx = haveAVX()
21 var ssse3 = haveSSSE3()
18 var avx512 bool
19 var avx2 bool
20 var avx bool
21 var sse bool
22 var sse2 bool
23 var sse3 bool
24 var ssse3 bool
25 var sse41 bool
26 var sse42 bool
27 var popcnt bool
28 var sha bool
2229 var armSha = haveArmSha()
2330
24 // haveAVX returns true when there is AVX support
25 func haveAVX() bool {
26 _, _, c, _ := cpuid(1)
31 func init() {
32 var _xsave bool
33 var _osxsave bool
34 var _avx bool
35 var _avx2 bool
36 var _avx512f bool
37 var _avx512dq bool
38 // var _avx512pf bool
39 // var _avx512er bool
40 // var _avx512cd bool
41 var _avx512bw bool
42 var _avx512vl bool
43 var _sseState bool
44 var _avxState bool
45 var _opmaskState bool
46 var _zmmHI256State bool
47 var _hi16ZmmState bool
2748
28 // Check XGETBV, OXSAVE and AVX bits
29 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
30 // Check for OS support
31 eax, _ := xgetbv(0)
32 return (eax & 0x6) == 0x6
33 }
34 return false
35 }
36
37 // haveAVX2 returns true when there is AVX2 support
38 func haveAVX2() bool {
3949 mfi, _, _, _ := cpuid(0)
4050
41 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
42 if mfi >= 7 && haveAVX() {
43 _, ebx, _, _ := cpuidex(7, 0)
44 return (ebx & 0x00000020) != 0
51 if mfi >= 1 {
52 _, _, c, d := cpuid(1)
53
54 sse = (d & (1 << 25)) != 0
55 sse2 = (d & (1 << 26)) != 0
56 sse3 = (c & (1 << 0)) != 0
57 ssse3 = (c & (1 << 9)) != 0
58 sse41 = (c & (1 << 19)) != 0
59 sse42 = (c & (1 << 20)) != 0
60 popcnt = (c & (1 << 23)) != 0
61 _xsave = (c & (1 << 26)) != 0
62 _osxsave = (c & (1 << 27)) != 0
63 _avx = (c & (1 << 28)) != 0
4564 }
46 return false
65
66 if mfi >= 7 {
67 _, b, _, _ := cpuid(7)
68
69 _avx2 = (b & (1 << 5)) != 0
70 _avx512f = (b & (1 << 16)) != 0
71 _avx512dq = (b & (1 << 17)) != 0
72 // _avx512pf = (b & (1 << 26)) != 0
73 // _avx512er = (b & (1 << 27)) != 0
74 // _avx512cd = (b & (1 << 28)) != 0
75 _avx512bw = (b & (1 << 30)) != 0
76 _avx512vl = (b & (1 << 31)) != 0
77 sha = (b & (1 << 29)) != 0
78 }
79
80 // Stop here if XSAVE unsupported or not enabled
81 if !_xsave || !_osxsave {
82 return
83 }
84
85 if _xsave && _osxsave {
86 a, _ := xgetbv(0)
87
88 _sseState = (a & (1 << 1)) != 0
89 _avxState = (a & (1 << 2)) != 0
90 _opmaskState = (a & (1 << 5)) != 0
91 _zmmHI256State = (a & (1 << 6)) != 0
92 _hi16ZmmState = (a & (1 << 7)) != 0
93 } else {
94 _sseState = true
95 }
96
97 // Very unlikely that OS would enable XSAVE and then disable SSE
98 if !_sseState {
99 sse = false
100 sse2 = false
101 sse3 = false
102 ssse3 = false
103 sse41 = false
104 sse42 = false
105 }
106
107 if _avxState {
108 avx = _avx
109 avx2 = _avx2
110 }
111
112 if _opmaskState && _zmmHI256State && _hi16ZmmState {
113 avx512 = (_avx512f &&
114 _avx512dq &&
115 _avx512bw &&
116 _avx512vl)
117 }
47118 }
48
49 // haveAVX512 returns true when there is AVX512 support
50 func haveAVX512() bool {
51 mfi, _, _, _ := cpuid(0)
52
53 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
54 if mfi >= 7 {
55 _, _, c, _ := cpuid(1)
56
57 // Only detect AVX-512 features if XGETBV is supported
58 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
59 // Check for OS support
60 eax, _ := xgetbv(0)
61 _, ebx, _, _ := cpuidex(7, 0)
62
63 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
64 // ZMM16-ZMM31 state are enabled by OS)
65 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
66 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
67 if ebx&(1<<16) == 0 {
68 return false // no AVX512F
69 }
70 if ebx&(1<<17) == 0 {
71 return false // no AVX512DQ
72 }
73 if ebx&(1<<30) == 0 {
74 return false // no AVX512BW
75 }
76 if ebx&(1<<31) == 0 {
77 return false // no AVX512VL
78 }
79 return true
80 }
81 }
82 }
83 return false
84 }
85
86 // haveSSSE3 returns true when there is SSSE3 support
87 func haveSSSE3() bool {
88
89 _, _, c, _ := cpuid(1)
90
91 return (c & 0x00000200) != 0
92 }
2323
2424 // func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
2525 TEXT ·cpuid(SB), 7, $0
26 XORL CX, CX
27 MOVL op+0(FP), AX
28 CPUID
29 MOVL AX, eax+4(FP)
30 MOVL BX, ebx+8(FP)
31 MOVL CX, ecx+12(FP)
32 MOVL DX, edx+16(FP)
33 RET
26 XORL CX, CX
27 MOVL op+0(FP), AX
28 CPUID
29 MOVL AX, eax+4(FP)
30 MOVL BX, ebx+8(FP)
31 MOVL CX, ecx+12(FP)
32 MOVL DX, edx+16(FP)
33 RET
3434
3535 // func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
3636 TEXT ·cpuidex(SB), 7, $0
37 MOVL op+0(FP), AX
38 MOVL op2+4(FP), CX
39 CPUID
40 MOVL AX, eax+8(FP)
41 MOVL BX, ebx+12(FP)
42 MOVL CX, ecx+16(FP)
43 MOVL DX, edx+20(FP)
44 RET
37 MOVL op+0(FP), AX
38 MOVL op2+4(FP), CX
39 CPUID
40 MOVL AX, eax+8(FP)
41 MOVL BX, ebx+12(FP)
42 MOVL CX, ecx+16(FP)
43 MOVL DX, edx+20(FP)
44 RET
4545
4646 // func xgetbv(index uint32) (eax, edx uint32)
4747 TEXT ·xgetbv(SB), 7, $0
48 MOVL index+0(FP), CX
49 BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
50 MOVL AX, eax+4(FP)
51 MOVL DX, edx+8(FP)
52 RET
48 MOVL index+0(FP), CX
49 BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
50 MOVL AX, eax+4(FP)
51 MOVL DX, edx+8(FP)
52 RET
2323
2424 // func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
2525 TEXT ·cpuid(SB), 7, $0
26 XORQ CX, CX
27 MOVL op+0(FP), AX
28 CPUID
29 MOVL AX, eax+8(FP)
30 MOVL BX, ebx+12(FP)
31 MOVL CX, ecx+16(FP)
32 MOVL DX, edx+20(FP)
33 RET
34
26 XORQ CX, CX
27 MOVL op+0(FP), AX
28 CPUID
29 MOVL AX, eax+8(FP)
30 MOVL BX, ebx+12(FP)
31 MOVL CX, ecx+16(FP)
32 MOVL DX, edx+20(FP)
33 RET
3534
3635 // func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
3736 TEXT ·cpuidex(SB), 7, $0
38 MOVL op+0(FP), AX
39 MOVL op2+4(FP), CX
40 CPUID
41 MOVL AX, eax+8(FP)
42 MOVL BX, ebx+12(FP)
43 MOVL CX, ecx+16(FP)
44 MOVL DX, edx+20(FP)
45 RET
37 MOVL op+0(FP), AX
38 MOVL op2+4(FP), CX
39 CPUID
40 MOVL AX, eax+8(FP)
41 MOVL BX, ebx+12(FP)
42 MOVL CX, ecx+16(FP)
43 MOVL DX, edx+20(FP)
44 RET
4645
4746 // func xgetbv(index uint32) (eax, edx uint32)
4847 TEXT ·xgetbv(SB), 7, $0
49 MOVL index+0(FP), CX
50 BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
51 MOVL AX, eax+8(FP)
52 MOVL DX, edx+12(FP)
53 RET
48 MOVL index+0(FP), CX
49 BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
50 MOVL AX, eax+8(FP)
51 MOVL DX, edx+12(FP)
52 RET
1717
1818 import (
1919 "crypto/sha256"
20 "encoding/binary"
2021 "hash"
2122 "runtime"
2223 )
2829 const BlockSize = 64
2930
3031 const (
31 chunk = 64
32 chunk = BlockSize
3233 init0 = 0x6A09E667
3334 init1 = 0xBB67AE85
3435 init2 = 0x3C6EF372
6162 d.len = 0
6263 }
6364
65 type blockfuncType int
66
67 const (
68 blockfuncGeneric blockfuncType = iota
69 blockfuncAvx512 blockfuncType = iota
70 blockfuncAvx2 blockfuncType = iota
71 blockfuncAvx blockfuncType = iota
72 blockfuncSsse blockfuncType = iota
73 blockfuncSha blockfuncType = iota
74 blockfuncArm blockfuncType = iota
75 )
76
77 var blockfunc blockfuncType
78
6479 func block(dig *digest, p []byte) {
80 if blockfunc == blockfuncSha {
81 blockShaGo(dig, p)
82 } else if blockfunc == blockfuncAvx2 {
83 blockAvx2Go(dig, p)
84 } else if blockfunc == blockfuncAvx {
85 blockAvxGo(dig, p)
86 } else if blockfunc == blockfuncSsse {
87 blockSsseGo(dig, p)
88 } else if blockfunc == blockfuncArm {
89 blockArmGo(dig, p)
90 } else if blockfunc == blockfuncGeneric {
91 blockGeneric(dig, p)
92 }
93 }
94
95 func init() {
6596 is386bit := runtime.GOARCH == "386"
6697 isARM := runtime.GOARCH == "arm"
67 if is386bit || isARM {
68 blockGeneric(dig, p)
69 }
70 switch !is386bit && !isARM {
98 switch {
99 case is386bit || isARM:
100 blockfunc = blockfuncGeneric
101 case sha && ssse3 && sse41:
102 blockfunc = blockfuncSha
71103 case avx2:
72 blockAvx2Go(dig, p)
104 blockfunc = blockfuncAvx2
73105 case avx:
74 blockAvxGo(dig, p)
106 blockfunc = blockfuncAvx
75107 case ssse3:
76 blockSsseGo(dig, p)
108 blockfunc = blockfuncSsse
77109 case armSha:
78 blockArmGo(dig, p)
110 blockfunc = blockfuncArm
79111 default:
80 blockGeneric(dig, p)
112 blockfunc = blockfuncGeneric
81113 }
82114 }
83115
84116 // New returns a new hash.Hash computing the SHA256 checksum.
85117 func New() hash.Hash {
86 if avx2 || avx || ssse3 || armSha {
118 if blockfunc != blockfuncGeneric {
87119 d := new(digest)
88120 d.Reset()
89121 return d
94126 }
95127
96128 // Sum256 - single caller sha256 helper
97 func Sum256(data []byte) [Size]byte {
129 func Sum256(data []byte) (result [Size]byte) {
98130 var d digest
99131 d.Reset()
100132 d.Write(data)
101 return d.checkSum()
133 result = d.checkSum()
134 return
102135 }
103136
104137 // Return size of checksum
140173 }
141174
142175 // Intermediate checksum function
143 func (d *digest) checkSum() [Size]byte {
144 len := d.len
145 // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
146 var tmp [64]byte
147 tmp[0] = 0x80
148 if len%64 < 56 {
149 d.Write(tmp[0 : 56-len%64])
150 } else {
151 d.Write(tmp[0 : 64+56-len%64])
152 }
153
154 // Length in bits.
155 len <<= 3
156 for i := uint(0); i < 8; i++ {
157 tmp[i] = byte(len >> (56 - 8*i))
158 }
159 d.Write(tmp[0:8])
160
161 if d.nx != 0 {
162 panic("d.nx != 0")
163 }
164
165 h := d.h[:]
166
167 var digest [Size]byte
168 for i, s := range h {
169 digest[i*4] = byte(s >> 24)
170 digest[i*4+1] = byte(s >> 16)
171 digest[i*4+2] = byte(s >> 8)
172 digest[i*4+3] = byte(s)
173 }
174
175 return digest
176 }
176 func (d *digest) checkSum() (digest [Size]byte) {
177 n := d.nx
178
179 var k [64]byte
180 copy(k[:], d.x[:n])
181
182 k[n] = 0x80
183
184 if n >= 56 {
185 block(d, k[:])
186
187 // clear block buffer - go compiles this to optimal 1x xorps + 4x movups
188 // unfortunately expressing this more succinctly results in much worse code
189 k[0] = 0
190 k[1] = 0
191 k[2] = 0
192 k[3] = 0
193 k[4] = 0
194 k[5] = 0
195 k[6] = 0
196 k[7] = 0
197 k[8] = 0
198 k[9] = 0
199 k[10] = 0
200 k[11] = 0
201 k[12] = 0
202 k[13] = 0
203 k[14] = 0
204 k[15] = 0
205 k[16] = 0
206 k[17] = 0
207 k[18] = 0
208 k[19] = 0
209 k[20] = 0
210 k[21] = 0
211 k[22] = 0
212 k[23] = 0
213 k[24] = 0
214 k[25] = 0
215 k[26] = 0
216 k[27] = 0
217 k[28] = 0
218 k[29] = 0
219 k[30] = 0
220 k[31] = 0
221 k[32] = 0
222 k[33] = 0
223 k[34] = 0
224 k[35] = 0
225 k[36] = 0
226 k[37] = 0
227 k[38] = 0
228 k[39] = 0
229 k[40] = 0
230 k[41] = 0
231 k[42] = 0
232 k[43] = 0
233 k[44] = 0
234 k[45] = 0
235 k[46] = 0
236 k[47] = 0
237 k[48] = 0
238 k[49] = 0
239 k[50] = 0
240 k[51] = 0
241 k[52] = 0
242 k[53] = 0
243 k[54] = 0
244 k[55] = 0
245 k[56] = 0
246 k[57] = 0
247 k[58] = 0
248 k[59] = 0
249 k[60] = 0
250 k[61] = 0
251 k[62] = 0
252 k[63] = 0
253 }
254 binary.BigEndian.PutUint64(k[56:64], uint64(d.len)<<3)
255 block(d, k[:])
256
257 {
258 const i = 0
259 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
260 }
261 {
262 const i = 1
263 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
264 }
265 {
266 const i = 2
267 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
268 }
269 {
270 const i = 3
271 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
272 }
273 {
274 const i = 4
275 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
276 }
277 {
278 const i = 5
279 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
280 }
281 {
282 const i = 6
283 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
284 }
285 {
286 const i = 7
287 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
288 }
289
290 return
291 }
22072207 }
22082208
22092209 func TestGolden(t *testing.T) {
2210 blockfuncSaved := blockfunc
2211
2212 if sha && ssse3 && sse41 {
2213 blockfunc = blockfuncSha
2214 for _, g := range golden {
2215 s := fmt.Sprintf("%x", Sum256([]byte(g.in)))
2216 if Sum256([]byte(g.in)) != g.out {
2217 t.Fatalf("SHA: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:]))
2218 }
2219 }
2220 }
22102221 if avx2 {
2222 blockfunc = blockfuncAvx2
22112223 for _, g := range golden {
22122224 s := fmt.Sprintf("%x", Sum256([]byte(g.in)))
22132225 if Sum256([]byte(g.in)) != g.out {
22142226 t.Fatalf("AVX2: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:]))
22152227 }
22162228 }
2217 avx2 = false
22182229 }
22192230 if avx {
2231 blockfunc = blockfuncAvx
22202232 for _, g := range golden {
22212233 s := fmt.Sprintf("%x", Sum256([]byte(g.in)))
22222234 if Sum256([]byte(g.in)) != g.out {
22232235 t.Fatalf("AVX: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:]))
22242236 }
22252237 }
2226 avx = false
22272238 }
22282239 if ssse3 {
2240 blockfunc = blockfuncSsse
22292241 for _, g := range golden {
22302242 s := fmt.Sprintf("%x", Sum256([]byte(g.in)))
22312243 if Sum256([]byte(g.in)) != g.out {
22332245 }
22342246 }
22352247 }
2248 if true {
2249 blockfunc = blockfuncGeneric
2250 for _, g := range golden {
2251 s := fmt.Sprintf("%x", Sum256([]byte(g.in)))
2252 if Sum256([]byte(g.in)) != g.out {
2253 t.Fatalf("Generic: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:]))
2254 }
2255 }
2256 }
2257
2258 blockfunc = blockfuncSaved
22362259 }
22372260
22382261 func TestSize(t *testing.T) {
22542277 var buf = make([]byte, size)
22552278 b.SetBytes(int64(size))
22562279 sum := make([]byte, bench.Size())
2280 b.ResetTimer()
22572281 for i := 0; i < b.N; i++ {
22582282 bench.Reset()
22592283 bench.Write(buf[:size])
22612285 }
22622286 }
22632287
2264 func BenchmarkHash8Bytes(b *testing.B) { benchmarkSize(b, 8) }
2265 func BenchmarkHash1K(b *testing.B) { benchmarkSize(b, 1024) }
2266 func BenchmarkHash8K(b *testing.B) { benchmarkSize(b, 8192) }
2267 func BenchmarkHash1MAvx2(b *testing.B) { benchmarkSize(b, 1024*1024) }
2268 func BenchmarkHash5MAvx2(b *testing.B) { benchmarkSize(b, 5*1024*1024) }
2269 func BenchmarkHash10MAvx2(b *testing.B) { benchmarkSize(b, 10*1024*1024) }
2288 func BenchmarkHash(b *testing.B) {
2289 algos := []struct {
2290 n string
2291 t blockfuncType
2292 f bool
2293 }{
2294 {"SHA_", blockfuncSha, sha && sse41 && ssse3},
2295 {"AVX2", blockfuncAvx2, avx2},
2296 {"AVX_", blockfuncAvx, avx},
2297 {"SSSE", blockfuncSsse, ssse3},
2298 {"GEN_", blockfuncGeneric, true},
2299 }
2300
2301 sizes := []struct {
2302 n string
2303 f func(*testing.B, int)
2304 s int
2305 }{
2306 {"8Bytes", benchmarkSize, 1 << 3},
2307 {"1K", benchmarkSize, 1 << 10},
2308 {"8K", benchmarkSize, 1 << 13},
2309 {"1M", benchmarkSize, 1 << 20},
2310 {"5M", benchmarkSize, 5 << 20},
2311 {"10M", benchmarkSize, 5 << 21},
2312 }
2313
2314 for _, a := range algos {
2315 if a.f {
2316 blockfuncSaved := blockfunc
2317 blockfunc = a.t
2318 for _, y := range sizes {
2319 s := a.n + "/" + y.n
2320 b.Run(s, func(b *testing.B) { y.f(b, y.s) })
2321 }
2322 blockfunc = blockfuncSaved
2323 }
2324 }
2325 }
116116 // func blockAvx2(h []uint32, message []uint8)
117117 TEXT ·blockAvx2(SB), 7, $0
118118
119 MOVQ ctx+0(FP), DI // DI: &h
120 MOVQ inp+24(FP), SI // SI: &message
121 MOVQ inplength+32(FP), DX // len(message)
122 ADDQ SI, DX // end pointer of input
123 MOVQ SP, R11 // copy stack pointer
124 SUBQ $0x220, SP // sp -= 0x220
125 ANDQ $0xfffffffffffffc00, SP // align stack frame
126 ADDQ $0x1c0, SP
127 MOVQ DI, 0x40(SP) // save ctx
128 MOVQ SI, 0x48(SP) // save input
129 MOVQ DX, 0x50(SP) // save end pointer
130 MOVQ R11, 0x58(SP) // save copy of stack pointer
131
132 WORD $0xf8c5; BYTE $0x77 // vzeroupper
133 ADDQ $0x40, SI // input++
134 MOVL (DI), AX
135 MOVQ SI, R12 // borrow $T1
136 MOVL 4(DI), BX
137 CMPQ SI, DX // $_end
138 MOVL 8(DI), CX
139 LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */
140 MOVL 12(DI), DX
141 MOVL 16(DI), R8
142 MOVL 20(DI), R9
143 MOVL 24(DI), R10
144 MOVL 28(DI), R11
145
146 LEAQ K256<>(SB), BP
147 LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */
148 LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */
149 LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */
119 MOVQ ctx+0(FP), DI // DI: &h
120 MOVQ inp+24(FP), SI // SI: &message
121 MOVQ inplength+32(FP), DX // len(message)
122 ADDQ SI, DX // end pointer of input
123 MOVQ SP, R11 // copy stack pointer
124 SUBQ $0x220, SP // sp -= 0x220
125 ANDQ $0xfffffffffffffc00, SP // align stack frame
126 ADDQ $0x1c0, SP
127 MOVQ DI, 0x40(SP) // save ctx
128 MOVQ SI, 0x48(SP) // save input
129 MOVQ DX, 0x50(SP) // save end pointer
130 MOVQ R11, 0x58(SP) // save copy of stack pointer
131
132 WORD $0xf8c5; BYTE $0x77 // vzeroupper
133 ADDQ $0x40, SI // input++
134 MOVL (DI), AX
135 MOVQ SI, R12 // borrow $T1
136 MOVL 4(DI), BX
137 CMPQ SI, DX // $_end
138 MOVL 8(DI), CX
139 LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */
140 MOVL 12(DI), DX
141 MOVL 16(DI), R8
142 MOVL 20(DI), R9
143 MOVL 24(DI), R10
144 MOVL 28(DI), R11
145
146 LEAQ K256<>(SB), BP
147 LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */
148 LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */
149 LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */
150150
151151 loop0:
152 LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10
153
154 // Load first 16 dwords from two blocks
155 MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40]
156 MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30]
157 MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20]
158 MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10]
159
160 // Byte swap data and transpose data into high/low
161 LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1
162 LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1
163 LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7
164 LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1
165 LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7
166 LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1
167
168 LEAQ K256<>(SB), BP
169 LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7
170 LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp]
171 LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7
172 LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp]
173 LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp]
174 LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp]
175
176 LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4
177 XORQ R14, R14
178 LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5
179
180 ADDQ $-0x40, SP
181 MOVQ BX, DI
182 LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
183 XORQ CX, DI // magic
184 LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7
185 MOVQ R9, R12
186 ADDQ $0x80,BP
152 LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10
153
154 // Load first 16 dwords from two blocks
155 MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40]
156 MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30]
157 MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20]
158 MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10]
159
160 // Byte swap data and transpose data into high/low
161 LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1
162 LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1
163 LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7
164 LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1
165 LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7
166 LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1
167
168 LEAQ K256<>(SB), BP
169 LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7
170 LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp]
171 LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7
172 LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp]
173 LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp]
174 LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp]
175
176 LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4
177 XORQ R14, R14
178 LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5
179
180 ADDQ $-0x40, SP
181 MOVQ BX, DI
182 LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
183 XORQ CX, DI // magic
184 LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7
185 MOVQ R9, R12
186 ADDQ $0x80, BP
187187
188188 loop1:
189 // Schedule 48 input dwords, by doing 3 rounds of 12 each
190 // Note: SIMD instructions are interleaved with the SHA calculations
191 ADDQ $-0x40, SP
192 LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4
193
194 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
195 LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
196 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
197 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
198 LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4
199 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
200 LONG $0x30048d42 // lea eax,[rax+r14*1]
201 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
202 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
203 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
204 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
205 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
206 LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7
207 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
208 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
209 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
210 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
211 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
212 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
213 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
214 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
215 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
216 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
217 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
218 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
219 WORD $0x2144; BYTE $0xff // and edi,r15d
220 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
221 WORD $0xdf31 // xor edi,ebx
222 LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa
223 WORD $0x3145; BYTE $0xee // xor r14d,r13d
224 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
225 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
226 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
227
228 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
229 LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
230 WORD $0x2141; BYTE $0xd4 // and r12d,edx
231 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
232 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
233 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
234 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
235 LONG $0x22148d47 // lea r10d,[r10+r12*1]
236 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
237 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
238 WORD $0x3141; BYTE $0xfd // xor r13d,edi
239 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
240 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
241 LONG $0x22148d47 // lea r10d,[r10+r12*1]
242 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
243 WORD $0x8944; BYTE $0xdf // mov edi,r11d
244 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
245 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
246 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
247 WORD $0xc731 // xor edi,eax
248 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
249 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
250 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
251 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
252 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
253 WORD $0x2141; BYTE $0xff // and r15d,edi
254 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
255 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
256 LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4
257 WORD $0x3145; BYTE $0xee // xor r14d,r13d
258 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
259 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
260 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
261
262 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
263 LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
264 WORD $0x2141; BYTE $0xcc // and r12d,ecx
265 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
266 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
267 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
268 LONG $0x32148d47 // lea r10d,[r10+r14*1]
269 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
270 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
271 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
272 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
273 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
274 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
275 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
276 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
277 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
278 LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
279 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
280 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
281 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
282 LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50
283 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
284 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
285 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
286 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
287 WORD $0x2144; BYTE $0xff // and edi,r15d
288 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
289 WORD $0x3144; BYTE $0xdf // xor edi,r11d
290 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
291 WORD $0x3145; BYTE $0xee // xor r14d,r13d
292 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
293 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
294 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
295
296 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
297 LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
298 WORD $0x2141; BYTE $0xdc // and r12d,ebx
299 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
300 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
301 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
302 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
303 LONG $0x20048d47 // lea r8d,[r8+r12*1]
304 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
305 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
306 WORD $0x3141; BYTE $0xfd // xor r13d,edi
307 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
308 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
309 LONG $0x20048d47 // lea r8d,[r8+r12*1]
310 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
311 WORD $0x8944; BYTE $0xcf // mov edi,r9d
312 LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
313 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
314 LONG $0x28048d47 // lea r8d,[r8+r13*1]
315 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
316 LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0]
317 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
318 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
319 LONG $0x00048d42 // lea eax,[rax+r8*1]
320 WORD $0x2141; BYTE $0xff // and r15d,edi
321 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
322 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
323 WORD $0x3145; BYTE $0xee // xor r14d,r13d
324 LONG $0x38048d47 // lea r8d,[r8+r15*1]
325 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
326
327 LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
328 LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4
329
330 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
331 LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
332 WORD $0x2141; BYTE $0xc4 // and r12d,eax
333 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
334 LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4
335 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
336 LONG $0x30048d47 // lea r8d,[r8+r14*1]
337 LONG $0x22148d42 // lea edx,[rdx+r12*1]
338 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
339 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
340 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
341 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
342 LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7
343 LONG $0x22148d42 // lea edx,[rdx+r12*1]
344 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
345 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
346 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
347 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
348 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
349 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
350 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
351 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
352 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
353 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
354 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
355 WORD $0x2144; BYTE $0xff // and edi,r15d
356 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
357 WORD $0x3144; BYTE $0xcf // xor edi,r9d
358 LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa
359 WORD $0x3145; BYTE $0xee // xor r14d,r13d
360 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
361 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
362 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
363
364 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
365 LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
366 WORD $0x2145; BYTE $0xdc // and r12d,r11d
367 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
368 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
369 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
370 LONG $0x32148d42 // lea edx,[rdx+r14*1]
371 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
372 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
373 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
374 WORD $0x3141; BYTE $0xfd // xor r13d,edi
375 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
376 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
377 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
378 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
379 WORD $0xd789 // mov edi,edx
380 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
381 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
382 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
383 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
384 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
385 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
386 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
387 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
388 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
389 WORD $0x2141; BYTE $0xff // and r15d,edi
390 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
391 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
392 LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4
393 WORD $0x3145; BYTE $0xee // xor r14d,r13d
394 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
395 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
396 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
397
398 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
399 LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
400 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
401 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
402 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
403 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
404 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
405 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
406 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
407 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
408 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
409 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
410 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
411 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
412 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
413 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
414 LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
415 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
416 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
417 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
418 LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50
419 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
420 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
421 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
422 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
423 WORD $0x2144; BYTE $0xff // and edi,r15d
424 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
425 WORD $0xd731 // xor edi,edx
426 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
427 WORD $0x3145; BYTE $0xee // xor r14d,r13d
428 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
429 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
430 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
431
432 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
433 LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
434 WORD $0x2145; BYTE $0xcc // and r12d,r9d
435 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
436 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
437 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
438 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
439 LONG $0x20048d42 // lea eax,[rax+r12*1]
440 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
441 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
442 WORD $0x3141; BYTE $0xfd // xor r13d,edi
443 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
444 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
445 LONG $0x20048d42 // lea eax,[rax+r12*1]
446 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
447 WORD $0xdf89 // mov edi,ebx
448 LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
449 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
450 LONG $0x28048d42 // lea eax,[rax+r13*1]
451 WORD $0xcf31 // xor edi,ecx
452 LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20]
453 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
454 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
455 LONG $0x00048d45 // lea r8d,[r8+rax*1]
456 WORD $0x2141; BYTE $0xff // and r15d,edi
457 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
458 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
459 WORD $0x3145; BYTE $0xee // xor r14d,r13d
460 LONG $0x38048d42 // lea eax,[rax+r15*1]
461 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
462
463 LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
464
465 LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40]
466 LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4
467
468 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
469 LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
470 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
471 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
472 LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4
473 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
474 LONG $0x30048d42 // lea eax,[rax+r14*1]
475 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
476 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
477 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
478 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
479 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
480 LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7
481 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
482 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
483 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
484 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
485 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
486 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
487 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
488 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
489 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
490 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
491 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
492 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
493 WORD $0x2144; BYTE $0xff // and edi,r15d
494 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
495 WORD $0xdf31 // xor edi,ebx
496 LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa
497 WORD $0x3145; BYTE $0xee // xor r14d,r13d
498 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
499 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
500 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
501
502 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
503 LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
504 WORD $0x2141; BYTE $0xd4 // and r12d,edx
505 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
506 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
507 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
508 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
509 LONG $0x22148d47 // lea r10d,[r10+r12*1]
510 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
511 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
512 WORD $0x3141; BYTE $0xfd // xor r13d,edi
513 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
514 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
515 LONG $0x22148d47 // lea r10d,[r10+r12*1]
516 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
517 WORD $0x8944; BYTE $0xdf // mov edi,r11d
518 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
519 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
520 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
521 WORD $0xc731 // xor edi,eax
522 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
523 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
524 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
525 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
526 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
527 WORD $0x2141; BYTE $0xff // and r15d,edi
528 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
529 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
530 LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4
531 WORD $0x3145; BYTE $0xee // xor r14d,r13d
532 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
533 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
534 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
535
536 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
537 LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
538 WORD $0x2141; BYTE $0xcc // and r12d,ecx
539 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
540 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
541 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
542 LONG $0x32148d47 // lea r10d,[r10+r14*1]
543 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
544 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
545 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
546 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
547 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
548 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
549 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
550 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
551 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
552 LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
553 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
554 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
555 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
556 LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50
557 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
558 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
559 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
560 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
561 WORD $0x2144; BYTE $0xff // and edi,r15d
562 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
563 WORD $0x3144; BYTE $0xdf // xor edi,r11d
564 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
565 WORD $0x3145; BYTE $0xee // xor r14d,r13d
566 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
567 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
568 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
569
570 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
571 LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
572 WORD $0x2141; BYTE $0xdc // and r12d,ebx
573 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
574 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
575 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
576 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
577 LONG $0x20048d47 // lea r8d,[r8+r12*1]
578 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
579 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
580 WORD $0x3141; BYTE $0xfd // xor r13d,edi
581 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
582 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
583 LONG $0x20048d47 // lea r8d,[r8+r12*1]
584 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
585 WORD $0x8944; BYTE $0xcf // mov edi,r9d
586 LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
587 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
588 LONG $0x28048d47 // lea r8d,[r8+r13*1]
589 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
590 LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40]
591 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
592 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
593 LONG $0x00048d42 // lea eax,[rax+r8*1]
594 WORD $0x2141; BYTE $0xff // and r15d,edi
595 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
596 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
597 WORD $0x3145; BYTE $0xee // xor r14d,r13d
598 LONG $0x38048d47 // lea r8d,[r8+r15*1]
599 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
600
601 LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
602 LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4
603
604 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
605 LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
606 WORD $0x2141; BYTE $0xc4 // and r12d,eax
607 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
608 LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4
609 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
610 LONG $0x30048d47 // lea r8d,[r8+r14*1]
611 LONG $0x22148d42 // lea edx,[rdx+r12*1]
612 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
613 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
614 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
615 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
616 LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7
617 LONG $0x22148d42 // lea edx,[rdx+r12*1]
618 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
619 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
620 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
621 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
622 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
623 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
624 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
625 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
626 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
627 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
628 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
629 WORD $0x2144; BYTE $0xff // and edi,r15d
630 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
631 WORD $0x3144; BYTE $0xcf // xor edi,r9d
632 LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa
633 WORD $0x3145; BYTE $0xee // xor r14d,r13d
634 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
635 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
636 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
637
638 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
639 LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
640 WORD $0x2145; BYTE $0xdc // and r12d,r11d
641 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
642 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
643 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
644 LONG $0x32148d42 // lea edx,[rdx+r14*1]
645 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
646 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
647 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
648 WORD $0x3141; BYTE $0xfd // xor r13d,edi
649 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
650 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
651 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
652 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
653 WORD $0xd789 // mov edi,edx
654 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
655 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
656 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
657 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
658 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
659 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
660 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
661 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
662 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
663 WORD $0x2141; BYTE $0xff // and r15d,edi
664 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
665 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
666 LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4
667 WORD $0x3145; BYTE $0xee // xor r14d,r13d
668 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
669 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
670 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
671
672 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
673 LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
674 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
675 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
676 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
677 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
678 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
679 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
680 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
681 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
682 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
683 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
684 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
685 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
686 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
687 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
688 LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
689 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
690 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
691 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
692 LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50
693 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
694 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
695 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
696 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
697 WORD $0x2144; BYTE $0xff // and edi,r15d
698 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
699 WORD $0xd731 // xor edi,edx
700 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
701 WORD $0x3145; BYTE $0xee // xor r14d,r13d
702 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
703 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
704 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
705
706 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
707 LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
708 WORD $0x2145; BYTE $0xcc // and r12d,r9d
709 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
710 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
711 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
712 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
713 LONG $0x20048d42 // lea eax,[rax+r12*1]
714 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
715 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
716 WORD $0x3141; BYTE $0xfd // xor r13d,edi
717 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
718 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
719 LONG $0x20048d42 // lea eax,[rax+r12*1]
720 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
721 WORD $0xdf89 // mov edi,ebx
722 LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
723 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
724 LONG $0x28048d42 // lea eax,[rax+r13*1]
725 WORD $0xcf31 // xor edi,ecx
726 LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60]
727 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
728 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
729 LONG $0x00048d45 // lea r8d,[r8+rax*1]
730 WORD $0x2141; BYTE $0xff // and r15d,edi
731 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
732 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
733 WORD $0x3145; BYTE $0xee // xor r14d,r13d
734 LONG $0x38048d42 // lea eax,[rax+r15*1]
735 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
736
737 LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
738 ADDQ $0x80, BP
739
740 CMPB 0x3(BP),$0x0
741 JNE loop1
742
743 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40)
744 LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40]
745 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
746 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
747 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
748 LONG $0x30048d42 // lea eax,[rax+r14*1]
749 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
750 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
751 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
752 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
753 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
754 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
755 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
756 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
757 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
758 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
759 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
760 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
761 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
762 WORD $0x2144; BYTE $0xff // and edi,r15d
763 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
764 WORD $0xdf31 // xor edi,ebx
765 WORD $0x3145; BYTE $0xee // xor r14d,r13d
766 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
767 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
768
769 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44)
770 LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44]
771 WORD $0x2141; BYTE $0xd4 // and r12d,edx
772 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
773 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
774 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
775 LONG $0x22148d47 // lea r10d,[r10+r12*1]
776 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
777 WORD $0x3141; BYTE $0xfd // xor r13d,edi
778 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
779 LONG $0x22148d47 // lea r10d,[r10+r12*1]
780 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
781 WORD $0x8944; BYTE $0xdf // mov edi,r11d
782 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
783 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
784 WORD $0xc731 // xor edi,eax
785 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
786 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
787 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
788 WORD $0x2141; BYTE $0xff // and r15d,edi
789 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
790 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
791 WORD $0x3145; BYTE $0xee // xor r14d,r13d
792 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
793 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
794
795 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48)
796 LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48]
797 WORD $0x2141; BYTE $0xcc // and r12d,ecx
798 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
799 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
800 LONG $0x32148d47 // lea r10d,[r10+r14*1]
801 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
802 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
803 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
804 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
805 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
806 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
807 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
808 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
809 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
810 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
811 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
812 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
813 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
814 WORD $0x2144; BYTE $0xff // and edi,r15d
815 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
816 WORD $0x3144; BYTE $0xdf // xor edi,r11d
817 WORD $0x3145; BYTE $0xee // xor r14d,r13d
818 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
819 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
820
821 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c)
822 LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c]
823 WORD $0x2141; BYTE $0xdc // and r12d,ebx
824 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
825 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
826 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
827 LONG $0x20048d47 // lea r8d,[r8+r12*1]
828 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
829 WORD $0x3141; BYTE $0xfd // xor r13d,edi
830 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
831 LONG $0x20048d47 // lea r8d,[r8+r12*1]
832 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
833 WORD $0x8944; BYTE $0xcf // mov edi,r9d
834 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
835 LONG $0x28048d47 // lea r8d,[r8+r13*1]
836 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
837 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
838 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
839 LONG $0x00048d42 // lea eax,[rax+r8*1]
840 WORD $0x2141; BYTE $0xff // and r15d,edi
841 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
842 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
843 WORD $0x3145; BYTE $0xee // xor r14d,r13d
844 LONG $0x38048d47 // lea r8d,[r8+r15*1]
845 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
846
847 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60)
848 LONG $0x60245403 // add edx,[rsp+0x60]
849 WORD $0x2141; BYTE $0xc4 // and r12d,eax
850 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
851 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
852 LONG $0x30048d47 // lea r8d,[r8+r14*1]
853 LONG $0x22148d42 // lea edx,[rdx+r12*1]
854 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
855 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
856 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
857 LONG $0x22148d42 // lea edx,[rdx+r12*1]
858 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
859 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
860 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
861 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
862 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
863 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
864 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
865 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
866 WORD $0x2144; BYTE $0xff // and edi,r15d
867 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
868 WORD $0x3144; BYTE $0xcf // xor edi,r9d
869 WORD $0x3145; BYTE $0xee // xor r14d,r13d
870 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
871 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
872
873 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64)
874 LONG $0x64244c03 // add ecx,[rsp+0x64]
875 WORD $0x2145; BYTE $0xdc // and r12d,r11d
876 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
877 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
878 LONG $0x32148d42 // lea edx,[rdx+r14*1]
879 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
880 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
881 WORD $0x3141; BYTE $0xfd // xor r13d,edi
882 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
883 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
884 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
885 WORD $0xd789 // mov edi,edx
886 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
887 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
888 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
889 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
890 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
891 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
892 WORD $0x2141; BYTE $0xff // and r15d,edi
893 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
894 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
895 WORD $0x3145; BYTE $0xee // xor r14d,r13d
896 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
897 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
898
899 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68)
900 LONG $0x68245c03 // add ebx,[rsp+0x68]
901 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
902 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
903 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
904 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
905 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
906 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
907 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
908 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
909 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
910 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
911 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
912 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
913 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
914 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
915 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
916 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
917 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
918 WORD $0x2144; BYTE $0xff // and edi,r15d
919 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
920 WORD $0xd731 // xor edi,edx
921 WORD $0x3145; BYTE $0xee // xor r14d,r13d
922 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
923 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
924
925 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c)
926 LONG $0x6c244403 // add eax,[rsp+0x6c]
927 WORD $0x2145; BYTE $0xcc // and r12d,r9d
928 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
929 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
930 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
931 LONG $0x20048d42 // lea eax,[rax+r12*1]
932 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
933 WORD $0x3141; BYTE $0xfd // xor r13d,edi
934 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
935 LONG $0x20048d42 // lea eax,[rax+r12*1]
936 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
937 WORD $0xdf89 // mov edi,ebx
938 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
939 LONG $0x28048d42 // lea eax,[rax+r13*1]
940 WORD $0xcf31 // xor edi,ecx
941 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
942 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
943 LONG $0x00048d45 // lea r8d,[r8+rax*1]
944 WORD $0x2141; BYTE $0xff // and r15d,edi
945 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
946 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
947 WORD $0x3145; BYTE $0xee // xor r14d,r13d
948 LONG $0x38048d42 // lea eax,[rax+r15*1]
949 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
950
951 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00)
952 LONG $0x241c0344 // add r11d,[rsp]
953 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
954 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
955 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
956 LONG $0x30048d42 // lea eax,[rax+r14*1]
957 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
958 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
959 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
960 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
961 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
962 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
963 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
964 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
965 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
966 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
967 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
968 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
969 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
970 WORD $0x2144; BYTE $0xff // and edi,r15d
971 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
972 WORD $0xdf31 // xor edi,ebx
973 WORD $0x3145; BYTE $0xee // xor r14d,r13d
974 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
975 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
976
977 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04)
978 LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4]
979 WORD $0x2141; BYTE $0xd4 // and r12d,edx
980 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
981 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
982 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
983 LONG $0x22148d47 // lea r10d,[r10+r12*1]
984 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
985 WORD $0x3141; BYTE $0xfd // xor r13d,edi
986 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
987 LONG $0x22148d47 // lea r10d,[r10+r12*1]
988 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
989 WORD $0x8944; BYTE $0xdf // mov edi,r11d
990 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
991 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
992 WORD $0xc731 // xor edi,eax
993 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
994 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
995 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
996 WORD $0x2141; BYTE $0xff // and r15d,edi
997 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
998 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
999 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1000 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
1001 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
1002
1003 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08)
1004 LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8]
1005 WORD $0x2141; BYTE $0xcc // and r12d,ecx
1006 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
1007 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
1008 LONG $0x32148d47 // lea r10d,[r10+r14*1]
1009 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1010 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
1011 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1012 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
1013 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1014 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1015 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
1016 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
1017 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
1018 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
1019 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
1020 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
1021 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
1022 WORD $0x2144; BYTE $0xff // and edi,r15d
1023 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1024 WORD $0x3144; BYTE $0xdf // xor edi,r11d
1025 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1026 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
1027 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
1028
1029 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c)
1030 LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc]
1031 WORD $0x2141; BYTE $0xdc // and r12d,ebx
1032 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
1033 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
1034 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
1035 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1036 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
1037 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1038 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
1039 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1040 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1041 WORD $0x8944; BYTE $0xcf // mov edi,r9d
1042 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
1043 LONG $0x28048d47 // lea r8d,[r8+r13*1]
1044 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
1045 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
1046 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
1047 LONG $0x00048d42 // lea eax,[rax+r8*1]
1048 WORD $0x2141; BYTE $0xff // and r15d,edi
1049 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1050 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
1051 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1052 LONG $0x38048d47 // lea r8d,[r8+r15*1]
1053 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
1054
1055 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20)
1056 LONG $0x20245403 // add edx,[rsp+0x20]
1057 WORD $0x2141; BYTE $0xc4 // and r12d,eax
1058 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
1059 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
1060 LONG $0x30048d47 // lea r8d,[r8+r14*1]
1061 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1062 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
1063 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1064 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
1065 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1066 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1067 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
1068 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
1069 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
1070 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
1071 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
1072 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
1073 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
1074 WORD $0x2144; BYTE $0xff // and edi,r15d
1075 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1076 WORD $0x3144; BYTE $0xcf // xor edi,r9d
1077 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1078 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
1079 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
1080
1081 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24)
1082 LONG $0x24244c03 // add ecx,[rsp+0x24]
1083 WORD $0x2145; BYTE $0xdc // and r12d,r11d
1084 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
1085 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
1086 LONG $0x32148d42 // lea edx,[rdx+r14*1]
1087 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1088 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
1089 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1090 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
1091 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1092 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1093 WORD $0xd789 // mov edi,edx
1094 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
1095 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
1096 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
1097 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
1098 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
1099 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
1100 WORD $0x2141; BYTE $0xff // and r15d,edi
1101 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1102 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
1103 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1104 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
1105 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
1106
1107 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28)
1108 LONG $0x28245c03 // add ebx,[rsp+0x28]
1109 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
1110 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
1111 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
1112 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
1113 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1114 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
1115 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1116 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
1117 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1118 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1119 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
1120 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
1121 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
1122 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
1123 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
1124 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
1125 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
1126 WORD $0x2144; BYTE $0xff // and edi,r15d
1127 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1128 WORD $0xd731 // xor edi,edx
1129 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1130 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
1131 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
1132
1133 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c)
1134 LONG $0x2c244403 // add eax,[rsp+0x2c]
1135 WORD $0x2145; BYTE $0xcc // and r12d,r9d
1136 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
1137 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
1138 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
1139 LONG $0x20048d42 // lea eax,[rax+r12*1]
1140 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
1141 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1142 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
1143 LONG $0x20048d42 // lea eax,[rax+r12*1]
1144 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1145 WORD $0xdf89 // mov edi,ebx
1146 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
1147 LONG $0x28048d42 // lea eax,[rax+r13*1]
1148 WORD $0xcf31 // xor edi,ecx
1149 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
1150 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
1151 LONG $0x00048d45 // lea r8d,[r8+rax*1]
1152 WORD $0x2141; BYTE $0xff // and r15d,edi
1153 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1154 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
1155 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1156 LONG $0x38048d42 // lea eax,[rax+r15*1]
1157 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
1158
1159 MOVQ 0x200(SP), DI // $_ctx
1160 ADDQ R14, AX
1161
1162 LEAQ 0x1c0(SP), BP
1163
1164 ADDL (DI), AX
1165 ADDL 4(DI), BX
1166 ADDL 8(DI), CX
1167 ADDL 12(DI), DX
1168 ADDL 16(DI), R8
1169 ADDL 20(DI), R9
1170 ADDL 24(DI), R10
1171 ADDL 28(DI), R11
1172
1173 MOVL AX, (DI)
1174 MOVL BX, 4(DI)
1175 MOVL CX, 8(DI)
1176 MOVL DX, 12(DI)
1177 MOVL R8, 16(DI)
1178 MOVL R9, 20(DI)
1179 MOVL R10, 24(DI)
1180 MOVL R11, 28(DI)
1181
1182 CMPQ SI, 0x50(BP) // $_end
1183 JE done
1184
1185 XORQ R14, R14
1186 MOVQ BX, DI
1187 XORQ CX, DI // magic
1188 MOVQ R9, R12
189 // Schedule 48 input dwords, by doing 3 rounds of 12 each
190 // Note: SIMD instructions are interleaved with the SHA calculations
191 ADDQ $-0x40, SP
192 LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4
193
194 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
195 LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
196 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
197 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
198 LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4
199 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
200 LONG $0x30048d42 // lea eax,[rax+r14*1]
201 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
202 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
203 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
204 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
205 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
206 LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7
207 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
208 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
209 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
210 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
211 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
212 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
213 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
214 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
215 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
216 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
217 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
218 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
219 WORD $0x2144; BYTE $0xff // and edi,r15d
220 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
221 WORD $0xdf31 // xor edi,ebx
222 LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa
223 WORD $0x3145; BYTE $0xee // xor r14d,r13d
224 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
225 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
226 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
227
228 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
229 LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
230 WORD $0x2141; BYTE $0xd4 // and r12d,edx
231 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
232 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
233 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
234 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
235 LONG $0x22148d47 // lea r10d,[r10+r12*1]
236 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
237 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
238 WORD $0x3141; BYTE $0xfd // xor r13d,edi
239 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
240 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
241 LONG $0x22148d47 // lea r10d,[r10+r12*1]
242 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
243 WORD $0x8944; BYTE $0xdf // mov edi,r11d
244 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
245 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
246 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
247 WORD $0xc731 // xor edi,eax
248 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
249 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
250 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
251 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
252 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
253 WORD $0x2141; BYTE $0xff // and r15d,edi
254 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
255 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
256 LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4
257 WORD $0x3145; BYTE $0xee // xor r14d,r13d
258 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
259 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
260 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
261
262 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
263 LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
264 WORD $0x2141; BYTE $0xcc // and r12d,ecx
265 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
266 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
267 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
268 LONG $0x32148d47 // lea r10d,[r10+r14*1]
269 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
270 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
271 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
272 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
273 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
274 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
275 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
276 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
277 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
278 LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
279 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
280 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
281 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
282 LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50
283 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
284 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
285 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
286 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
287 WORD $0x2144; BYTE $0xff // and edi,r15d
288 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
289 WORD $0x3144; BYTE $0xdf // xor edi,r11d
290 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
291 WORD $0x3145; BYTE $0xee // xor r14d,r13d
292 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
293 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
294 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
295
296 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
297 LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
298 WORD $0x2141; BYTE $0xdc // and r12d,ebx
299 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
300 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
301 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
302 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
303 LONG $0x20048d47 // lea r8d,[r8+r12*1]
304 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
305 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
306 WORD $0x3141; BYTE $0xfd // xor r13d,edi
307 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
308 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
309 LONG $0x20048d47 // lea r8d,[r8+r12*1]
310 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
311 WORD $0x8944; BYTE $0xcf // mov edi,r9d
312 LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
313 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
314 LONG $0x28048d47 // lea r8d,[r8+r13*1]
315 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
316 LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0]
317 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
318 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
319 LONG $0x00048d42 // lea eax,[rax+r8*1]
320 WORD $0x2141; BYTE $0xff // and r15d,edi
321 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
322 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
323 WORD $0x3145; BYTE $0xee // xor r14d,r13d
324 LONG $0x38048d47 // lea r8d,[r8+r15*1]
325 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
326
327 LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
328 LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4
329
330 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
331 LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
332 WORD $0x2141; BYTE $0xc4 // and r12d,eax
333 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
334 LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4
335 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
336 LONG $0x30048d47 // lea r8d,[r8+r14*1]
337 LONG $0x22148d42 // lea edx,[rdx+r12*1]
338 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
339 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
340 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
341 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
342 LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7
343 LONG $0x22148d42 // lea edx,[rdx+r12*1]
344 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
345 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
346 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
347 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
348 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
349 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
350 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
351 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
352 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
353 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
354 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
355 WORD $0x2144; BYTE $0xff // and edi,r15d
356 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
357 WORD $0x3144; BYTE $0xcf // xor edi,r9d
358 LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa
359 WORD $0x3145; BYTE $0xee // xor r14d,r13d
360 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
361 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
362 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
363
364 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
365 LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
366 WORD $0x2145; BYTE $0xdc // and r12d,r11d
367 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
368 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
369 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
370 LONG $0x32148d42 // lea edx,[rdx+r14*1]
371 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
372 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
373 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
374 WORD $0x3141; BYTE $0xfd // xor r13d,edi
375 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
376 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
377 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
378 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
379 WORD $0xd789 // mov edi,edx
380 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
381 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
382 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
383 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
384 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
385 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
386 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
387 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
388 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
389 WORD $0x2141; BYTE $0xff // and r15d,edi
390 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
391 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
392 LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4
393 WORD $0x3145; BYTE $0xee // xor r14d,r13d
394 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
395 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
396 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
397
398 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
399 LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
400 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
401 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
402 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
403 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
404 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
405 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
406 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
407 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
408 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
409 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
410 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
411 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
412 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
413 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
414 LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
415 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
416 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
417 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
418 LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50
419 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
420 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
421 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
422 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
423 WORD $0x2144; BYTE $0xff // and edi,r15d
424 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
425 WORD $0xd731 // xor edi,edx
426 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
427 WORD $0x3145; BYTE $0xee // xor r14d,r13d
428 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
429 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
430 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
431
432 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
433 LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
434 WORD $0x2145; BYTE $0xcc // and r12d,r9d
435 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
436 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
437 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
438 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
439 LONG $0x20048d42 // lea eax,[rax+r12*1]
440 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
441 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
442 WORD $0x3141; BYTE $0xfd // xor r13d,edi
443 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
444 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
445 LONG $0x20048d42 // lea eax,[rax+r12*1]
446 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
447 WORD $0xdf89 // mov edi,ebx
448 LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
449 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
450 LONG $0x28048d42 // lea eax,[rax+r13*1]
451 WORD $0xcf31 // xor edi,ecx
452 LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20]
453 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
454 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
455 LONG $0x00048d45 // lea r8d,[r8+rax*1]
456 WORD $0x2141; BYTE $0xff // and r15d,edi
457 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
458 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
459 WORD $0x3145; BYTE $0xee // xor r14d,r13d
460 LONG $0x38048d42 // lea eax,[rax+r15*1]
461 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
462
463 LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
464
465 LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40]
466 LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4
467
468 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
469 LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
470 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
471 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
472 LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4
473 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
474 LONG $0x30048d42 // lea eax,[rax+r14*1]
475 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
476 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
477 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
478 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
479 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
480 LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7
481 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
482 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
483 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
484 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
485 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
486 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
487 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
488 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
489 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
490 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
491 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
492 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
493 WORD $0x2144; BYTE $0xff // and edi,r15d
494 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
495 WORD $0xdf31 // xor edi,ebx
496 LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa
497 WORD $0x3145; BYTE $0xee // xor r14d,r13d
498 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
499 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
500 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
501
502 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
503 LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
504 WORD $0x2141; BYTE $0xd4 // and r12d,edx
505 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
506 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
507 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
508 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
509 LONG $0x22148d47 // lea r10d,[r10+r12*1]
510 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
511 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
512 WORD $0x3141; BYTE $0xfd // xor r13d,edi
513 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
514 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
515 LONG $0x22148d47 // lea r10d,[r10+r12*1]
516 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
517 WORD $0x8944; BYTE $0xdf // mov edi,r11d
518 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
519 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
520 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
521 WORD $0xc731 // xor edi,eax
522 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
523 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
524 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
525 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
526 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
527 WORD $0x2141; BYTE $0xff // and r15d,edi
528 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
529 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
530 LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4
531 WORD $0x3145; BYTE $0xee // xor r14d,r13d
532 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
533 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
534 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
535
536 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
537 LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
538 WORD $0x2141; BYTE $0xcc // and r12d,ecx
539 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
540 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
541 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
542 LONG $0x32148d47 // lea r10d,[r10+r14*1]
543 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
544 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
545 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
546 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
547 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
548 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
549 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
550 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
551 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
552 LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
553 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
554 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
555 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
556 LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50
557 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
558 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
559 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
560 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
561 WORD $0x2144; BYTE $0xff // and edi,r15d
562 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
563 WORD $0x3144; BYTE $0xdf // xor edi,r11d
564 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
565 WORD $0x3145; BYTE $0xee // xor r14d,r13d
566 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
567 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
568 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
569
570 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
571 LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
572 WORD $0x2141; BYTE $0xdc // and r12d,ebx
573 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
574 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
575 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
576 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
577 LONG $0x20048d47 // lea r8d,[r8+r12*1]
578 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
579 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
580 WORD $0x3141; BYTE $0xfd // xor r13d,edi
581 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
582 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
583 LONG $0x20048d47 // lea r8d,[r8+r12*1]
584 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
585 WORD $0x8944; BYTE $0xcf // mov edi,r9d
586 LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
587 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
588 LONG $0x28048d47 // lea r8d,[r8+r13*1]
589 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
590 LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40]
591 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
592 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
593 LONG $0x00048d42 // lea eax,[rax+r8*1]
594 WORD $0x2141; BYTE $0xff // and r15d,edi
595 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
596 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
597 WORD $0x3145; BYTE $0xee // xor r14d,r13d
598 LONG $0x38048d47 // lea r8d,[r8+r15*1]
599 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
600
601 LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
602 LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4
603
604 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
605 LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
606 WORD $0x2141; BYTE $0xc4 // and r12d,eax
607 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
608 LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4
609 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
610 LONG $0x30048d47 // lea r8d,[r8+r14*1]
611 LONG $0x22148d42 // lea edx,[rdx+r12*1]
612 LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
613 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
614 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
615 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
616 LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7
617 LONG $0x22148d42 // lea edx,[rdx+r12*1]
618 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
619 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
620 LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
621 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
622 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
623 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
624 LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
625 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
626 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
627 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
628 LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
629 WORD $0x2144; BYTE $0xff // and edi,r15d
630 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
631 WORD $0x3144; BYTE $0xcf // xor edi,r9d
632 LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa
633 WORD $0x3145; BYTE $0xee // xor r14d,r13d
634 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
635 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
636 LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
637
638 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
639 LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
640 WORD $0x2145; BYTE $0xdc // and r12d,r11d
641 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
642 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
643 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
644 LONG $0x32148d42 // lea edx,[rdx+r14*1]
645 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
646 LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
647 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
648 WORD $0x3141; BYTE $0xfd // xor r13d,edi
649 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
650 LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
651 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
652 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
653 WORD $0xd789 // mov edi,edx
654 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
655 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
656 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
657 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
658 LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
659 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
660 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
661 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
662 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
663 WORD $0x2141; BYTE $0xff // and r15d,edi
664 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
665 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
666 LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4
667 WORD $0x3145; BYTE $0xee // xor r14d,r13d
668 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
669 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
670 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
671
672 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
673 LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
674 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
675 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
676 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
677 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
678 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
679 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
680 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
681 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
682 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
683 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
684 LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
685 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
686 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
687 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
688 LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
689 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
690 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
691 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
692 LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50
693 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
694 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
695 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
696 LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
697 WORD $0x2144; BYTE $0xff // and edi,r15d
698 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
699 WORD $0xd731 // xor edi,edx
700 LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
701 WORD $0x3145; BYTE $0xee // xor r14d,r13d
702 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
703 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
704 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
705
706 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
707 LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
708 WORD $0x2145; BYTE $0xcc // and r12d,r9d
709 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
710 LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
711 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
712 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
713 LONG $0x20048d42 // lea eax,[rax+r12*1]
714 LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
715 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
716 WORD $0x3141; BYTE $0xfd // xor r13d,edi
717 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
718 LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
719 LONG $0x20048d42 // lea eax,[rax+r12*1]
720 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
721 WORD $0xdf89 // mov edi,ebx
722 LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
723 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
724 LONG $0x28048d42 // lea eax,[rax+r13*1]
725 WORD $0xcf31 // xor edi,ecx
726 LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60]
727 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
728 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
729 LONG $0x00048d45 // lea r8d,[r8+rax*1]
730 WORD $0x2141; BYTE $0xff // and r15d,edi
731 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
732 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
733 WORD $0x3145; BYTE $0xee // xor r14d,r13d
734 LONG $0x38048d42 // lea eax,[rax+r15*1]
735 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
736
737 LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
738 ADDQ $0x80, BP
739
740 CMPB 0x3(BP), $0x0
741 JNE loop1
742
743 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40)
744 LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40]
745 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
746 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
747 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
748 LONG $0x30048d42 // lea eax,[rax+r14*1]
749 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
750 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
751 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
752 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
753 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
754 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
755 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
756 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
757 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
758 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
759 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
760 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
761 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
762 WORD $0x2144; BYTE $0xff // and edi,r15d
763 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
764 WORD $0xdf31 // xor edi,ebx
765 WORD $0x3145; BYTE $0xee // xor r14d,r13d
766 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
767 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
768
769 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44)
770 LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44]
771 WORD $0x2141; BYTE $0xd4 // and r12d,edx
772 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
773 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
774 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
775 LONG $0x22148d47 // lea r10d,[r10+r12*1]
776 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
777 WORD $0x3141; BYTE $0xfd // xor r13d,edi
778 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
779 LONG $0x22148d47 // lea r10d,[r10+r12*1]
780 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
781 WORD $0x8944; BYTE $0xdf // mov edi,r11d
782 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
783 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
784 WORD $0xc731 // xor edi,eax
785 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
786 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
787 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
788 WORD $0x2141; BYTE $0xff // and r15d,edi
789 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
790 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
791 WORD $0x3145; BYTE $0xee // xor r14d,r13d
792 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
793 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
794
795 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48)
796 LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48]
797 WORD $0x2141; BYTE $0xcc // and r12d,ecx
798 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
799 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
800 LONG $0x32148d47 // lea r10d,[r10+r14*1]
801 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
802 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
803 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
804 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
805 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
806 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
807 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
808 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
809 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
810 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
811 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
812 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
813 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
814 WORD $0x2144; BYTE $0xff // and edi,r15d
815 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
816 WORD $0x3144; BYTE $0xdf // xor edi,r11d
817 WORD $0x3145; BYTE $0xee // xor r14d,r13d
818 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
819 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
820
821 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c)
822 LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c]
823 WORD $0x2141; BYTE $0xdc // and r12d,ebx
824 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
825 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
826 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
827 LONG $0x20048d47 // lea r8d,[r8+r12*1]
828 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
829 WORD $0x3141; BYTE $0xfd // xor r13d,edi
830 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
831 LONG $0x20048d47 // lea r8d,[r8+r12*1]
832 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
833 WORD $0x8944; BYTE $0xcf // mov edi,r9d
834 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
835 LONG $0x28048d47 // lea r8d,[r8+r13*1]
836 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
837 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
838 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
839 LONG $0x00048d42 // lea eax,[rax+r8*1]
840 WORD $0x2141; BYTE $0xff // and r15d,edi
841 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
842 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
843 WORD $0x3145; BYTE $0xee // xor r14d,r13d
844 LONG $0x38048d47 // lea r8d,[r8+r15*1]
845 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
846
847 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60)
848 LONG $0x60245403 // add edx,[rsp+0x60]
849 WORD $0x2141; BYTE $0xc4 // and r12d,eax
850 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
851 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
852 LONG $0x30048d47 // lea r8d,[r8+r14*1]
853 LONG $0x22148d42 // lea edx,[rdx+r12*1]
854 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
855 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
856 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
857 LONG $0x22148d42 // lea edx,[rdx+r12*1]
858 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
859 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
860 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
861 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
862 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
863 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
864 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
865 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
866 WORD $0x2144; BYTE $0xff // and edi,r15d
867 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
868 WORD $0x3144; BYTE $0xcf // xor edi,r9d
869 WORD $0x3145; BYTE $0xee // xor r14d,r13d
870 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
871 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
872
873 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64)
874 LONG $0x64244c03 // add ecx,[rsp+0x64]
875 WORD $0x2145; BYTE $0xdc // and r12d,r11d
876 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
877 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
878 LONG $0x32148d42 // lea edx,[rdx+r14*1]
879 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
880 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
881 WORD $0x3141; BYTE $0xfd // xor r13d,edi
882 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
883 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
884 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
885 WORD $0xd789 // mov edi,edx
886 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
887 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
888 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
889 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
890 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
891 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
892 WORD $0x2141; BYTE $0xff // and r15d,edi
893 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
894 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
895 WORD $0x3145; BYTE $0xee // xor r14d,r13d
896 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
897 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
898
899 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68)
900 LONG $0x68245c03 // add ebx,[rsp+0x68]
901 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
902 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
903 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
904 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
905 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
906 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
907 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
908 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
909 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
910 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
911 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
912 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
913 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
914 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
915 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
916 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
917 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
918 WORD $0x2144; BYTE $0xff // and edi,r15d
919 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
920 WORD $0xd731 // xor edi,edx
921 WORD $0x3145; BYTE $0xee // xor r14d,r13d
922 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
923 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
924
925 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c)
926 LONG $0x6c244403 // add eax,[rsp+0x6c]
927 WORD $0x2145; BYTE $0xcc // and r12d,r9d
928 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
929 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
930 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
931 LONG $0x20048d42 // lea eax,[rax+r12*1]
932 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
933 WORD $0x3141; BYTE $0xfd // xor r13d,edi
934 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
935 LONG $0x20048d42 // lea eax,[rax+r12*1]
936 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
937 WORD $0xdf89 // mov edi,ebx
938 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
939 LONG $0x28048d42 // lea eax,[rax+r13*1]
940 WORD $0xcf31 // xor edi,ecx
941 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
942 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
943 LONG $0x00048d45 // lea r8d,[r8+rax*1]
944 WORD $0x2141; BYTE $0xff // and r15d,edi
945 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
946 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
947 WORD $0x3145; BYTE $0xee // xor r14d,r13d
948 LONG $0x38048d42 // lea eax,[rax+r15*1]
949 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
950
951 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00)
952 LONG $0x241c0344 // add r11d,[rsp]
953 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
954 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
955 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
956 LONG $0x30048d42 // lea eax,[rax+r14*1]
957 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
958 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
959 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
960 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
961 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
962 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
963 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
964 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
965 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
966 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
967 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
968 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
969 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
970 WORD $0x2144; BYTE $0xff // and edi,r15d
971 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
972 WORD $0xdf31 // xor edi,ebx
973 WORD $0x3145; BYTE $0xee // xor r14d,r13d
974 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
975 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
976
977 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04)
978 LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4]
979 WORD $0x2141; BYTE $0xd4 // and r12d,edx
980 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
981 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
982 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
983 LONG $0x22148d47 // lea r10d,[r10+r12*1]
984 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
985 WORD $0x3141; BYTE $0xfd // xor r13d,edi
986 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
987 LONG $0x22148d47 // lea r10d,[r10+r12*1]
988 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
989 WORD $0x8944; BYTE $0xdf // mov edi,r11d
990 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
991 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
992 WORD $0xc731 // xor edi,eax
993 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
994 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
995 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
996 WORD $0x2141; BYTE $0xff // and r15d,edi
997 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
998 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
999 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1000 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
1001 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
1002
1003 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08)
1004 LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8]
1005 WORD $0x2141; BYTE $0xcc // and r12d,ecx
1006 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
1007 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
1008 LONG $0x32148d47 // lea r10d,[r10+r14*1]
1009 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1010 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
1011 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1012 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
1013 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1014 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1015 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
1016 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
1017 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
1018 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
1019 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
1020 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
1021 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
1022 WORD $0x2144; BYTE $0xff // and edi,r15d
1023 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1024 WORD $0x3144; BYTE $0xdf // xor edi,r11d
1025 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1026 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
1027 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
1028
1029 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c)
1030 LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc]
1031 WORD $0x2141; BYTE $0xdc // and r12d,ebx
1032 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
1033 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
1034 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
1035 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1036 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
1037 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1038 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
1039 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1040 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1041 WORD $0x8944; BYTE $0xcf // mov edi,r9d
1042 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
1043 LONG $0x28048d47 // lea r8d,[r8+r13*1]
1044 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
1045 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
1046 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
1047 LONG $0x00048d42 // lea eax,[rax+r8*1]
1048 WORD $0x2141; BYTE $0xff // and r15d,edi
1049 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1050 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
1051 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1052 LONG $0x38048d47 // lea r8d,[r8+r15*1]
1053 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
1054
1055 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20)
1056 LONG $0x20245403 // add edx,[rsp+0x20]
1057 WORD $0x2141; BYTE $0xc4 // and r12d,eax
1058 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
1059 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
1060 LONG $0x30048d47 // lea r8d,[r8+r14*1]
1061 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1062 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
1063 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1064 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
1065 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1066 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1067 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
1068 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
1069 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
1070 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
1071 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
1072 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
1073 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
1074 WORD $0x2144; BYTE $0xff // and edi,r15d
1075 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1076 WORD $0x3144; BYTE $0xcf // xor edi,r9d
1077 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1078 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
1079 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
1080
1081 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24)
1082 LONG $0x24244c03 // add ecx,[rsp+0x24]
1083 WORD $0x2145; BYTE $0xdc // and r12d,r11d
1084 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
1085 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
1086 LONG $0x32148d42 // lea edx,[rdx+r14*1]
1087 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1088 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
1089 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1090 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
1091 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1092 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1093 WORD $0xd789 // mov edi,edx
1094 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
1095 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
1096 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
1097 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
1098 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
1099 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
1100 WORD $0x2141; BYTE $0xff // and r15d,edi
1101 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1102 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
1103 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1104 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
1105 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
1106
1107 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28)
1108 LONG $0x28245c03 // add ebx,[rsp+0x28]
1109 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
1110 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
1111 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
1112 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
1113 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1114 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
1115 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1116 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
1117 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1118 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1119 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
1120 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
1121 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
1122 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
1123 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
1124 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
1125 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
1126 WORD $0x2144; BYTE $0xff // and edi,r15d
1127 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1128 WORD $0xd731 // xor edi,edx
1129 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1130 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
1131 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
1132
1133 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c)
1134 LONG $0x2c244403 // add eax,[rsp+0x2c]
1135 WORD $0x2145; BYTE $0xcc // and r12d,r9d
1136 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
1137 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
1138 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
1139 LONG $0x20048d42 // lea eax,[rax+r12*1]
1140 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
1141 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1142 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
1143 LONG $0x20048d42 // lea eax,[rax+r12*1]
1144 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1145 WORD $0xdf89 // mov edi,ebx
1146 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
1147 LONG $0x28048d42 // lea eax,[rax+r13*1]
1148 WORD $0xcf31 // xor edi,ecx
1149 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
1150 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
1151 LONG $0x00048d45 // lea r8d,[r8+rax*1]
1152 WORD $0x2141; BYTE $0xff // and r15d,edi
1153 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1154 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
1155 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1156 LONG $0x38048d42 // lea eax,[rax+r15*1]
1157 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
1158
1159 MOVQ 0x200(SP), DI // $_ctx
1160 ADDQ R14, AX
1161
1162 LEAQ 0x1c0(SP), BP
1163
1164 ADDL (DI), AX
1165 ADDL 4(DI), BX
1166 ADDL 8(DI), CX
1167 ADDL 12(DI), DX
1168 ADDL 16(DI), R8
1169 ADDL 20(DI), R9
1170 ADDL 24(DI), R10
1171 ADDL 28(DI), R11
1172
1173 MOVL AX, (DI)
1174 MOVL BX, 4(DI)
1175 MOVL CX, 8(DI)
1176 MOVL DX, 12(DI)
1177 MOVL R8, 16(DI)
1178 MOVL R9, 20(DI)
1179 MOVL R10, 24(DI)
1180 MOVL R11, 28(DI)
1181
1182 CMPQ SI, 0x50(BP) // $_end
1183 JE done
1184
1185 XORQ R14, R14
1186 MOVQ BX, DI
1187 XORQ CX, DI // magic
1188 MOVQ R9, R12
11891189
11901190 loop2:
1191 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10)
1192 LONG $0x105d0344 // add r11d,[rbp+0x10]
1193 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
1194 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
1195 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
1196 LONG $0x30048d42 // lea eax,[rax+r14*1]
1197 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
1198 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
1199 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1200 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
1201 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
1202 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1203 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
1204 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
1205 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
1206 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
1207 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
1208 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
1209 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
1210 WORD $0x2144; BYTE $0xff // and edi,r15d
1211 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1212 WORD $0xdf31 // xor edi,ebx
1213 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1214 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
1215 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
1216
1217 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14)
1218 LONG $0x14550344 // add r10d,[rbp+0x14]
1219 WORD $0x2141; BYTE $0xd4 // and r12d,edx
1220 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
1221 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
1222 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
1223 LONG $0x22148d47 // lea r10d,[r10+r12*1]
1224 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
1225 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1226 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
1227 LONG $0x22148d47 // lea r10d,[r10+r12*1]
1228 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1229 WORD $0x8944; BYTE $0xdf // mov edi,r11d
1230 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
1231 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
1232 WORD $0xc731 // xor edi,eax
1233 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
1234 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
1235 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
1236 WORD $0x2141; BYTE $0xff // and r15d,edi
1237 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1238 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
1239 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1240 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
1241 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
1242
1243 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18)
1244 LONG $0x184d0344 // add r9d,[rbp+0x18]
1245 WORD $0x2141; BYTE $0xcc // and r12d,ecx
1246 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
1247 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
1248 LONG $0x32148d47 // lea r10d,[r10+r14*1]
1249 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1250 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
1251 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1252 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
1253 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1254 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1255 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
1256 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
1257 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
1258 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
1259 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
1260 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
1261 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
1262 WORD $0x2144; BYTE $0xff // and edi,r15d
1263 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1264 WORD $0x3144; BYTE $0xdf // xor edi,r11d
1265 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1266 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
1267 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
1268
1269 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c)
1270 LONG $0x1c450344 // add r8d,[rbp+0x1c]
1271 WORD $0x2141; BYTE $0xdc // and r12d,ebx
1272 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
1273 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
1274 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
1275 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1276 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
1277 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1278 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
1279 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1280 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1281 WORD $0x8944; BYTE $0xcf // mov edi,r9d
1282 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
1283 LONG $0x28048d47 // lea r8d,[r8+r13*1]
1284 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
1285 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
1286 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
1287 LONG $0x00048d42 // lea eax,[rax+r8*1]
1288 WORD $0x2141; BYTE $0xff // and r15d,edi
1289 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1290 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
1291 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1292 LONG $0x38048d47 // lea r8d,[r8+r15*1]
1293 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
1294
1295 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30)
1296 WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30]
1297 WORD $0x2141; BYTE $0xc4 // and r12d,eax
1298 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
1299 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
1300 LONG $0x30048d47 // lea r8d,[r8+r14*1]
1301 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1302 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
1303 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1304 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
1305 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1306 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1307 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
1308 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
1309 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
1310 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
1311 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
1312 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
1313 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
1314 WORD $0x2144; BYTE $0xff // and edi,r15d
1315 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1316 WORD $0x3144; BYTE $0xcf // xor edi,r9d
1317 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1318 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
1319 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
1320
1321 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34)
1322 WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34]
1323 WORD $0x2145; BYTE $0xdc // and r12d,r11d
1324 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
1325 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
1326 LONG $0x32148d42 // lea edx,[rdx+r14*1]
1327 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1328 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
1329 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1330 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
1331 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1332 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1333 WORD $0xd789 // mov edi,edx
1334 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
1335 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
1336 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
1337 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
1338 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
1339 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
1340 WORD $0x2141; BYTE $0xff // and r15d,edi
1341 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1342 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
1343 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1344 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
1345 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
1346
1347 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38)
1348 WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38]
1349 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
1350 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
1351 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
1352 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
1353 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1354 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
1355 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1356 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
1357 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1358 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1359 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
1360 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
1361 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
1362 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
1363 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
1364 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
1365 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
1366 WORD $0x2144; BYTE $0xff // and edi,r15d
1367 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1368 WORD $0xd731 // xor edi,edx
1369 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1370 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
1371 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
1372
1373 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c)
1374 WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c]
1375 WORD $0x2145; BYTE $0xcc // and r12d,r9d
1376 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
1377 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
1378 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
1379 LONG $0x20048d42 // lea eax,[rax+r12*1]
1380 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
1381 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1382 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
1383 LONG $0x20048d42 // lea eax,[rax+r12*1]
1384 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1385 WORD $0xdf89 // mov edi,ebx
1386 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
1387 LONG $0x28048d42 // lea eax,[rax+r13*1]
1388 WORD $0xcf31 // xor edi,ecx
1389 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
1390 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
1391 LONG $0x00048d45 // lea r8d,[r8+rax*1]
1392 WORD $0x2141; BYTE $0xff // and r15d,edi
1393 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1394 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
1395 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1396 LONG $0x38048d42 // lea eax,[rax+r15*1]
1397 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
1398
1399 ADDQ $-0x40, BP
1400 CMPQ BP, SP
1401 JAE loop2
1402
1403 MOVQ 0x200(SP), DI // $_ctx
1404 ADDQ R14, AX
1405
1406 ADDQ $0x1c0, SP
1407
1408 ADDL (DI), AX
1409 ADDL 4(DI), BX
1410 ADDL 8(DI), CX
1411 ADDL 12(DI), DX
1412 ADDL 16(DI), R8
1413 ADDL 20(DI), R9
1414
1415 ADDQ $0x80, SI // input += 2
1416 ADDL 24(DI), R10
1417 MOVQ SI, R12
1418 ADDL 28(DI), R11
1419 CMPQ SI, 0x50(SP) // input == _end
1420
1421 MOVL AX, (DI)
1422 LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */
1423 MOVL AX, (DI)
1424 MOVL BX, 4(DI)
1425 MOVL CX, 8(DI)
1426 MOVL DX, 12(DI)
1427 MOVL R8, 16(DI)
1428 MOVL R9, 20(DI)
1429 MOVL R10, 24(DI)
1430 MOVL R11, 28(DI)
1431
1432 JBE loop0
1433 LEAQ (SP), BP
1191 // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10)
1192 LONG $0x105d0344 // add r11d,[rbp+0x10]
1193 WORD $0x2145; BYTE $0xc4 // and r12d,r8d
1194 LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
1195 LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
1196 LONG $0x30048d42 // lea eax,[rax+r14*1]
1197 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
1198 LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
1199 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1200 LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
1201 LONG $0x231c8d47 // lea r11d,[r11+r12*1]
1202 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1203 WORD $0x8941; BYTE $0xc7 // mov r15d,eax
1204 LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
1205 LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
1206 WORD $0x3141; BYTE $0xdf // xor r15d,ebx
1207 LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
1208 LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
1209 LONG $0x1a148d42 // lea edx,[rdx+r11*1]
1210 WORD $0x2144; BYTE $0xff // and edi,r15d
1211 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1212 WORD $0xdf31 // xor edi,ebx
1213 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1214 LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
1215 WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
1216
1217 // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14)
1218 LONG $0x14550344 // add r10d,[rbp+0x14]
1219 WORD $0x2141; BYTE $0xd4 // and r12d,edx
1220 LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
1221 LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
1222 LONG $0x331c8d47 // lea r11d,[r11+r14*1]
1223 LONG $0x22148d47 // lea r10d,[r10+r12*1]
1224 LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
1225 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1226 LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
1227 LONG $0x22148d47 // lea r10d,[r10+r12*1]
1228 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1229 WORD $0x8944; BYTE $0xdf // mov edi,r11d
1230 LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
1231 LONG $0x2a148d47 // lea r10d,[r10+r13*1]
1232 WORD $0xc731 // xor edi,eax
1233 LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
1234 LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
1235 LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
1236 WORD $0x2141; BYTE $0xff // and r15d,edi
1237 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1238 WORD $0x3141; BYTE $0xc7 // xor r15d,eax
1239 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1240 LONG $0x3a148d47 // lea r10d,[r10+r15*1]
1241 WORD $0x8941; BYTE $0xd4 // mov r12d,edx
1242
1243 // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18)
1244 LONG $0x184d0344 // add r9d,[rbp+0x18]
1245 WORD $0x2141; BYTE $0xcc // and r12d,ecx
1246 LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
1247 LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
1248 LONG $0x32148d47 // lea r10d,[r10+r14*1]
1249 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1250 LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
1251 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1252 LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
1253 LONG $0x210c8d47 // lea r9d,[r9+r12*1]
1254 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1255 WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
1256 LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
1257 LONG $0x290c8d47 // lea r9d,[r9+r13*1]
1258 WORD $0x3145; BYTE $0xdf // xor r15d,r11d
1259 LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
1260 LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
1261 LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
1262 WORD $0x2144; BYTE $0xff // and edi,r15d
1263 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1264 WORD $0x3144; BYTE $0xdf // xor edi,r11d
1265 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1266 LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
1267 WORD $0x8941; BYTE $0xcc // mov r12d,ecx
1268
1269 // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c)
1270 LONG $0x1c450344 // add r8d,[rbp+0x1c]
1271 WORD $0x2141; BYTE $0xdc // and r12d,ebx
1272 LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
1273 LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
1274 LONG $0x310c8d47 // lea r9d,[r9+r14*1]
1275 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1276 LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
1277 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1278 LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
1279 LONG $0x20048d47 // lea r8d,[r8+r12*1]
1280 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1281 WORD $0x8944; BYTE $0xcf // mov edi,r9d
1282 LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
1283 LONG $0x28048d47 // lea r8d,[r8+r13*1]
1284 WORD $0x3144; BYTE $0xd7 // xor edi,r10d
1285 LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
1286 LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
1287 LONG $0x00048d42 // lea eax,[rax+r8*1]
1288 WORD $0x2141; BYTE $0xff // and r15d,edi
1289 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1290 WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
1291 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1292 LONG $0x38048d47 // lea r8d,[r8+r15*1]
1293 WORD $0x8941; BYTE $0xdc // mov r12d,ebx
1294
1295 // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30)
1296 WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30]
1297 WORD $0x2141; BYTE $0xc4 // and r12d,eax
1298 LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
1299 LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
1300 LONG $0x30048d47 // lea r8d,[r8+r14*1]
1301 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1302 LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
1303 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1304 LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
1305 LONG $0x22148d42 // lea edx,[rdx+r12*1]
1306 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1307 WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
1308 LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
1309 LONG $0x2a148d42 // lea edx,[rdx+r13*1]
1310 WORD $0x3145; BYTE $0xcf // xor r15d,r9d
1311 LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
1312 LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
1313 LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
1314 WORD $0x2144; BYTE $0xff // and edi,r15d
1315 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1316 WORD $0x3144; BYTE $0xcf // xor edi,r9d
1317 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1318 WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
1319 WORD $0x8941; BYTE $0xc4 // mov r12d,eax
1320
1321 // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34)
1322 WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34]
1323 WORD $0x2145; BYTE $0xdc // and r12d,r11d
1324 LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
1325 LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
1326 LONG $0x32148d42 // lea edx,[rdx+r14*1]
1327 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1328 LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
1329 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1330 LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
1331 LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
1332 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1333 WORD $0xd789 // mov edi,edx
1334 LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
1335 LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
1336 WORD $0x3144; BYTE $0xc7 // xor edi,r8d
1337 LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
1338 LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
1339 LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
1340 WORD $0x2141; BYTE $0xff // and r15d,edi
1341 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1342 WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
1343 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1344 LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
1345 WORD $0x8945; BYTE $0xdc // mov r12d,r11d
1346
1347 // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38)
1348 WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38]
1349 WORD $0x2145; BYTE $0xd4 // and r12d,r10d
1350 LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
1351 LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
1352 LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
1353 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1354 LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
1355 WORD $0x3145; BYTE $0xfd // xor r13d,r15d
1356 LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
1357 LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
1358 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1359 WORD $0x8941; BYTE $0xcf // mov r15d,ecx
1360 LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
1361 LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
1362 WORD $0x3141; BYTE $0xd7 // xor r15d,edx
1363 LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
1364 LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
1365 LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
1366 WORD $0x2144; BYTE $0xff // and edi,r15d
1367 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1368 WORD $0xd731 // xor edi,edx
1369 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1370 WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
1371 WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
1372
1373 // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c)
1374 WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c]
1375 WORD $0x2145; BYTE $0xcc // and r12d,r9d
1376 LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
1377 LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
1378 LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
1379 LONG $0x20048d42 // lea eax,[rax+r12*1]
1380 LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
1381 WORD $0x3141; BYTE $0xfd // xor r13d,edi
1382 LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
1383 LONG $0x20048d42 // lea eax,[rax+r12*1]
1384 WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
1385 WORD $0xdf89 // mov edi,ebx
1386 LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
1387 LONG $0x28048d42 // lea eax,[rax+r13*1]
1388 WORD $0xcf31 // xor edi,ecx
1389 LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
1390 LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
1391 LONG $0x00048d45 // lea r8d,[r8+rax*1]
1392 WORD $0x2141; BYTE $0xff // and r15d,edi
1393 WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
1394 WORD $0x3141; BYTE $0xcf // xor r15d,ecx
1395 WORD $0x3145; BYTE $0xee // xor r14d,r13d
1396 LONG $0x38048d42 // lea eax,[rax+r15*1]
1397 WORD $0x8945; BYTE $0xcc // mov r12d,r9d
1398
1399 ADDQ $-0x40, BP
1400 CMPQ BP, SP
1401 JAE loop2
1402
1403 MOVQ 0x200(SP), DI // $_ctx
1404 ADDQ R14, AX
1405
1406 ADDQ $0x1c0, SP
1407
1408 ADDL (DI), AX
1409 ADDL 4(DI), BX
1410 ADDL 8(DI), CX
1411 ADDL 12(DI), DX
1412 ADDL 16(DI), R8
1413 ADDL 20(DI), R9
1414
1415 ADDQ $0x80, SI // input += 2
1416 ADDL 24(DI), R10
1417 MOVQ SI, R12
1418 ADDL 28(DI), R11
1419 CMPQ SI, 0x50(SP) // input == _end
1420
1421 MOVL AX, (DI)
1422 LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */
1423 MOVL AX, (DI)
1424 MOVL BX, 4(DI)
1425 MOVL CX, 8(DI)
1426 MOVL DX, 12(DI)
1427 MOVL R8, 16(DI)
1428 MOVL R9, 20(DI)
1429 MOVL R10, 24(DI)
1430 MOVL R11, 28(DI)
1431
1432 JBE loop0
1433 LEAQ (SP), BP
14341434
14351435 done:
1436 MOVQ BP, SP
1437 MOVQ 0x58(SP), SP
1438 WORD $0xf8c5; BYTE $0x77 // vzeroupper
1439
1440 RET
1441
1436 MOVQ BP, SP
1437 MOVQ 0x58(SP), SP
1438 WORD $0xf8c5; BYTE $0x77 // vzeroupper
1439
1440 RET
1441
2727 )
2828
2929 //go:noescape
30 func sha256_x16_avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
31
32 // Do not start at 0 but next multiple of 16 so as to be able to
30 func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
31
32 // Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to
3333 // differentiate with default initialiation value of 0
34 const Avx512ServerUid = 16
34 const Avx512ServerUID = 16
3535
3636 var uidCounter uint64
3737
38 // NewAvx512 - initialize sha256 Avx512 implementation.
3839 func NewAvx512(a512srv *Avx512Server) hash.Hash {
3940 uid := atomic.AddUint64(&uidCounter, 1)
4041 return &Avx512Digest{uid: uid, a512srv: a512srv}
4142 }
4243
43 // Type for computing SHA256 using AVX51
44 // Avx512Digest - Type for computing SHA256 using Avx512
4445 type Avx512Digest struct {
4546 uid uint64
4647 a512srv *Avx512Server
5152 result [Size]byte
5253 }
5354
54 // Return size of checksum
55 // Size - Return size of checksum
5556 func (d *Avx512Digest) Size() int { return Size }
5657
57 // Return blocksize of checksum
58 // BlockSize - Return blocksize of checksum
5859 func (d Avx512Digest) BlockSize() int { return BlockSize }
5960
61 // Reset - reset sha digest to its initial values
6062 func (d *Avx512Digest) Reset() {
6163 d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
6264 d.nx = 0
6870 func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
6971
7072 if d.final {
71 return 0, errors.New("Avx512Digest already finalized. Reset first before writing again.")
73 return 0, errors.New("Avx512Digest already finalized. Reset first before writing again")
7274 }
7375
7476 nn = len(p)
9395 return
9496 }
9597
96 // Return sha256 sum in bytes
98 // Sum - Return sha256 sum in bytes
9799 func (d *Avx512Digest) Sum(in []byte) (result []byte) {
98100
99101 if d.final {
261263 func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
262264
263265 scratch := [512]byte{}
264 sha256_x16_avx512(digests, &scratch, &table, mask, input)
266 sha256X16Avx512(digests, &scratch, &table, mask, input)
265267
266268 output := [16][Size]byte{}
267269 for i := 0; i < 16; i++ {
289291 sumCh chan [Size]byte
290292 }
291293
292 // Type to implement 16x parallel handling of SHA256 invocations
294 // Avx512Server - Type to implement 16x parallel handling of SHA256 invocations
293295 type Avx512Server struct {
294296 blocksCh chan blockInput // Input channel
295297 totalIn int // Total number of inputs waiting to be processed
297299 digests map[uint64][Size]byte // Map of uids to (interim) digest results
298300 }
299301
300 // Info for each lane
302 // Avx512LaneInfo - Info for each lane
301303 type Avx512LaneInfo struct {
302304 uid uint64 // unique identification for this SHA processing
303305 block []byte // input block to be processed
304306 outputCh chan [Size]byte // channel for output result
305307 }
306308
307 // Create new object for parallel processing handling
309 // NewAvx512Server - Create new object for parallel processing handling
308310 func NewAvx512Server() *Avx512Server {
309311 a512srv := &Avx512Server{}
310312 a512srv.digests = make(map[uint64][Size]byte)
315317 return a512srv
316318 }
317319
318 // Sole handler for reading from the input channel
320 // Process - Sole handler for reading from the input channel
319321 func (a512srv *Avx512Server) Process() {
320322 for {
321323 select {
362364 if lane.uid == uid {
363365 if lane.block != nil {
364366 a512srv.lanes[i] = Avx512LaneInfo{} // clear message
365 a512srv.totalIn -= 1
367 a512srv.totalIn--
366368 }
367369 }
368370 }
402404 return len(p), nil
403405 }
404406
407 // Sum - return sha256 sum in bytes for a given sum id.
405408 func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
406409 sumCh := make(chan [32]byte)
407410 a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
0 TEXT ·sha256_x16_avx512(SB), 7, $0
1 MOVQ digests+0(FP), DI
2 MOVQ scratch+8(FP), R12
3 MOVQ mask_len+32(FP), SI
4 MOVQ r14+24(FP), R13
5 MOVQ (R13), R14
6 LONG $0x92fbc1c4; BYTE $0xce
7 LEAQ inputs+48(FP), AX
8 QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07
9 MOVQ table+16(FP), DX
10 WORD $0x3148; BYTE $0xc9
0 TEXT ·sha256X16Avx512(SB), 7, $0
1 MOVQ digests+0(FP), DI
2 MOVQ scratch+8(FP), R12
3 MOVQ mask_len+32(FP), SI
4 MOVQ r14+24(FP), R13
5 MOVQ (R13), R14
6 LONG $0x92fbc1c4; BYTE $0xce
7 LEAQ inputs+48(FP), AX
8 QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07
9 MOVQ table+16(FP), DX
10 WORD $0x3148; BYTE $0xc9
1111 TESTQ $(1<<0), R14
12 JE skipInput0
13 MOVQ 0*24(AX), R9
14 LONG $0x487cc162; WORD $0x0410; BYTE $0x09
12 JE skipInput0
13 MOVQ 0*24(AX), R9
14 LONG $0x487cc162; WORD $0x0410; BYTE $0x09
15
1516 skipInput0:
1617 TESTQ $(1<<1), R14
17 JE skipInput1
18 MOVQ 1*24(AX), R9
19 LONG $0x487cc162; WORD $0x0c10; BYTE $0x09
18 JE skipInput1
19 MOVQ 1*24(AX), R9
20 LONG $0x487cc162; WORD $0x0c10; BYTE $0x09
21
2022 skipInput1:
2123 TESTQ $(1<<2), R14
22 JE skipInput2
23 MOVQ 2*24(AX), R9
24 LONG $0x487cc162; WORD $0x1410; BYTE $0x09
24 JE skipInput2
25 MOVQ 2*24(AX), R9
26 LONG $0x487cc162; WORD $0x1410; BYTE $0x09
27
2528 skipInput2:
2629 TESTQ $(1<<3), R14
27 JE skipInput3
28 MOVQ 3*24(AX), R9
29 LONG $0x487cc162; WORD $0x1c10; BYTE $0x09
30 JE skipInput3
31 MOVQ 3*24(AX), R9
32 LONG $0x487cc162; WORD $0x1c10; BYTE $0x09
33
3034 skipInput3:
3135 TESTQ $(1<<4), R14
32 JE skipInput4
33 MOVQ 4*24(AX), R9
34 LONG $0x487cc162; WORD $0x2410; BYTE $0x09
36 JE skipInput4
37 MOVQ 4*24(AX), R9
38 LONG $0x487cc162; WORD $0x2410; BYTE $0x09
39
3540 skipInput4:
3641 TESTQ $(1<<5), R14
37 JE skipInput5
38 MOVQ 5*24(AX), R9
39 LONG $0x487cc162; WORD $0x2c10; BYTE $0x09
42 JE skipInput5
43 MOVQ 5*24(AX), R9
44 LONG $0x487cc162; WORD $0x2c10; BYTE $0x09
45
4046 skipInput5:
4147 TESTQ $(1<<6), R14
42 JE skipInput6
43 MOVQ 6*24(AX), R9
44 LONG $0x487cc162; WORD $0x3410; BYTE $0x09
48 JE skipInput6
49 MOVQ 6*24(AX), R9
50 LONG $0x487cc162; WORD $0x3410; BYTE $0x09
51
4552 skipInput6:
4653 TESTQ $(1<<7), R14
47 JE skipInput7
48 MOVQ 7*24(AX), R9
49 LONG $0x487cc162; WORD $0x3c10; BYTE $0x09
54 JE skipInput7
55 MOVQ 7*24(AX), R9
56 LONG $0x487cc162; WORD $0x3c10; BYTE $0x09
57
5058 skipInput7:
5159 TESTQ $(1<<8), R14
52 JE skipInput8
53 MOVQ 8*24(AX), R9
54 LONG $0x487c4162; WORD $0x0410; BYTE $0x09
60 JE skipInput8
61 MOVQ 8*24(AX), R9
62 LONG $0x487c4162; WORD $0x0410; BYTE $0x09
63
5564 skipInput8:
5665 TESTQ $(1<<9), R14
57 JE skipInput9
58 MOVQ 9*24(AX), R9
59 LONG $0x487c4162; WORD $0x0c10; BYTE $0x09
66 JE skipInput9
67 MOVQ 9*24(AX), R9
68 LONG $0x487c4162; WORD $0x0c10; BYTE $0x09
69
6070 skipInput9:
6171 TESTQ $(1<<10), R14
62 JE skipInput10
63 MOVQ 10*24(AX), R9
64 LONG $0x487c4162; WORD $0x1410; BYTE $0x09
72 JE skipInput10
73 MOVQ 10*24(AX), R9
74 LONG $0x487c4162; WORD $0x1410; BYTE $0x09
75
6576 skipInput10:
6677 TESTQ $(1<<11), R14
67 JE skipInput11
68 MOVQ 11*24(AX), R9
69 LONG $0x487c4162; WORD $0x1c10; BYTE $0x09
78 JE skipInput11
79 MOVQ 11*24(AX), R9
80 LONG $0x487c4162; WORD $0x1c10; BYTE $0x09
81
7082 skipInput11:
7183 TESTQ $(1<<12), R14
72 JE skipInput12
73 MOVQ 12*24(AX), R9
74 LONG $0x487c4162; WORD $0x2410; BYTE $0x09
84 JE skipInput12
85 MOVQ 12*24(AX), R9
86 LONG $0x487c4162; WORD $0x2410; BYTE $0x09
87
7588 skipInput12:
7689 TESTQ $(1<<13), R14
77 JE skipInput13
78 MOVQ 13*24(AX), R9
79 LONG $0x487c4162; WORD $0x2c10; BYTE $0x09
90 JE skipInput13
91 MOVQ 13*24(AX), R9
92 LONG $0x487c4162; WORD $0x2c10; BYTE $0x09
93
8094 skipInput13:
8195 TESTQ $(1<<14), R14
82 JE skipInput14
83 MOVQ 14*24(AX), R9
84 LONG $0x487c4162; WORD $0x3410; BYTE $0x09
96 JE skipInput14
97 MOVQ 14*24(AX), R9
98 LONG $0x487c4162; WORD $0x3410; BYTE $0x09
99
85100 skipInput14:
86101 TESTQ $(1<<15), R14
87 JE skipInput15
88 MOVQ 15*24(AX), R9
89 LONG $0x487c4162; WORD $0x3c10; BYTE $0x09
102 JE skipInput15
103 MOVQ 15*24(AX), R9
104 LONG $0x487c4162; WORD $0x3c10; BYTE $0x09
105
90106 skipInput15:
91107 lloop:
92 LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX
93 LONG $0x487e7162; WORD $0x1a6f
94 MOVQ table+16(FP), DX
95 QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88
96 LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX
97 LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8
98 QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005
99 JE lastLoop
100 ADDQ $8, R13
101 MOVQ (R13), R14
102 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31
103 TESTQ $(1<<0), R14
104 JE skipNext0
105 MOVQ 0*24(AX), R9
106 LONG $0x487cc162; WORD $0x0410; BYTE $0x09
108 LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX
109 LONG $0x487e7162; WORD $0x1a6f
110 MOVQ table+16(FP), DX
111 QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88
112 LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX
113 LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8
114 QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005
115 JE lastLoop
116 ADDQ $8, R13
117 MOVQ (R13), R14
118 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31
119 TESTQ $(1<<0), R14
120 JE skipNext0
121 MOVQ 0*24(AX), R9
122 LONG $0x487cc162; WORD $0x0410; BYTE $0x09
123
107124 skipNext0:
108 QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32
125 QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32
109126 TESTQ $(1<<1), R14
110 JE skipNext1
111 MOVQ 1*24(AX), R9
112 LONG $0x487cc162; WORD $0x0c10; BYTE $0x09
127 JE skipNext1
128 MOVQ 1*24(AX), R9
129 LONG $0x487cc162; WORD $0x0c10; BYTE $0x09
130
113131 skipNext1:
114 QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33
132 QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33
115133 TESTQ $(1<<2), R14
116 JE skipNext2
117 MOVQ 2*24(AX), R9
118 LONG $0x487cc162; WORD $0x1410; BYTE $0x09
134 JE skipNext2
135 MOVQ 2*24(AX), R9
136 LONG $0x487cc162; WORD $0x1410; BYTE $0x09
137
119138 skipNext2:
120 QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34
139 QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34
121140 TESTQ $(1<<3), R14
122 JE skipNext3
123 MOVQ 3*24(AX), R9
124 LONG $0x487cc162; WORD $0x1c10; BYTE $0x09
141 JE skipNext3
142 MOVQ 3*24(AX), R9
143 LONG $0x487cc162; WORD $0x1c10; BYTE $0x09
144
125145 skipNext3:
126 QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35
146 QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35
127147 TESTQ $(1<<4), R14
128 JE skipNext4
129 MOVQ 4*24(AX), R9
130 LONG $0x487cc162; WORD $0x2410; BYTE $0x09
148 JE skipNext4
149 MOVQ 4*24(AX), R9
150 LONG $0x487cc162; WORD $0x2410; BYTE $0x09
151
131152 skipNext4:
132 QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36
153 QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36
133154 TESTQ $(1<<5), R14
134 JE skipNext5
135 MOVQ 5*24(AX), R9
136 LONG $0x487cc162; WORD $0x2c10; BYTE $0x09
155 JE skipNext5
156 MOVQ 5*24(AX), R9
157 LONG $0x487cc162; WORD $0x2c10; BYTE $0x09
158
137159 skipNext5:
138 QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37
160 QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37
139161 TESTQ $(1<<6), R14
140 JE skipNext6
141 MOVQ 6*24(AX), R9
142 LONG $0x487cc162; WORD $0x3410; BYTE $0x09
162 JE skipNext6
163 MOVQ 6*24(AX), R9
164 LONG $0x487cc162; WORD $0x3410; BYTE $0x09
165
143166 skipNext6:
144 QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38
167 QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38
145168 TESTQ $(1<<7), R14
146 JE skipNext7
147 MOVQ 7*24(AX), R9
148 LONG $0x487cc162; WORD $0x3c10; BYTE $0x09
169 JE skipNext7
170 MOVQ 7*24(AX), R9
171 LONG $0x487cc162; WORD $0x3c10; BYTE $0x09
172
149173 skipNext7:
150 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39
174 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39
151175 TESTQ $(1<<8), R14
152 JE skipNext8
153 MOVQ 8*24(AX), R9
154 LONG $0x487c4162; WORD $0x0410; BYTE $0x09
176 JE skipNext8
177 MOVQ 8*24(AX), R9
178 LONG $0x487c4162; WORD $0x0410; BYTE $0x09
179
155180 skipNext8:
156 QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a
181 QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a
157182 TESTQ $(1<<9), R14
158 JE skipNext9
159 MOVQ 9*24(AX), R9
160 LONG $0x487c4162; WORD $0x0c10; BYTE $0x09
183 JE skipNext9
184 MOVQ 9*24(AX), R9
185 LONG $0x487c4162; WORD $0x0c10; BYTE $0x09
186
161187 skipNext9:
162 QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b
188 QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b
163189 TESTQ $(1<<10), R14
164 JE skipNext10
165 MOVQ 10*24(AX), R9
166 LONG $0x487c4162; WORD $0x1410; BYTE $0x09
190 JE skipNext10
191 MOVQ 10*24(AX), R9
192 LONG $0x487c4162; WORD $0x1410; BYTE $0x09
193
167194 skipNext10:
168 QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c
195 QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c
169196 TESTQ $(1<<11), R14
170 JE skipNext11
171 MOVQ 11*24(AX), R9
172 LONG $0x487c4162; WORD $0x1c10; BYTE $0x09
197 JE skipNext11
198 MOVQ 11*24(AX), R9
199 LONG $0x487c4162; WORD $0x1c10; BYTE $0x09
200
173201 skipNext11:
174 QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d
202 QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d
175203 TESTQ $(1<<12), R14
176 JE skipNext12
177 MOVQ 12*24(AX), R9
178 LONG $0x487c4162; WORD $0x2410; BYTE $0x09
204 JE skipNext12
205 MOVQ 12*24(AX), R9
206 LONG $0x487c4162; WORD $0x2410; BYTE $0x09
207
179208 skipNext12:
180 QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e
209 QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e
181210 TESTQ $(1<<13), R14
182 JE skipNext13
183 MOVQ 13*24(AX), R9
184 LONG $0x487c4162; WORD $0x2c10; BYTE $0x09
211 JE skipNext13
212 MOVQ 13*24(AX), R9
213 LONG $0x487c4162; WORD $0x2c10; BYTE $0x09
214
185215 skipNext13:
186 QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f
216 QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f
187217 TESTQ $(1<<14), R14
188 JE skipNext14
189 MOVQ 14*24(AX), R9
190 LONG $0x487c4162; WORD $0x3410; BYTE $0x09
218 JE skipNext14
219 MOVQ 14*24(AX), R9
220 LONG $0x487c4162; WORD $0x3410; BYTE $0x09
221
191222 skipNext14:
192 QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40
223 QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40
193224 TESTQ $(1<<15), R14
194 JE skipNext15
195 MOVQ 15*24(AX), R9
196 LONG $0x487c4162; WORD $0x3c10; BYTE $0x09
225 JE skipNext15
226 MOVQ 15*24(AX), R9
227 LONG $0x487c4162; WORD $0x3c10; BYTE $0x09
228
197229 skipNext15:
198230 QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0xc4fbfe4945d16207; LONG $0xce92fbc1
199 JMP lloop
231 JMP lloop
232
200233 lastLoop:
201 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f
234 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f
202235 VZEROUPPER
203236 RET
237
204238 DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203
205239 DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b
206240 DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203
209209 for i := 0; i < 16; i++ {
210210 input := make([]byte, 64)
211211 copy(input, golden[offset+i].in)
212 server.Write(uint64(Avx512ServerUid+i), input)
212 server.Write(uint64(Avx512ServerUID+i), input)
213213 }
214214
215215 // Second block of 64 bytes
216216 for i := 0; i < 16; i++ {
217217 input := make([]byte, 64)
218218 copy(input, golden[offset+i].in[64:])
219 server.Write(uint64(Avx512ServerUid+i), input)
219 server.Write(uint64(Avx512ServerUID+i), input)
220220 }
221221
222222 wg := sync.WaitGroup{}
240240 t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:]))
241241 }
242242 wg.Done()
243 }(i, uint64(Avx512ServerUid+i), input)
243 }(i, uint64(Avx512ServerUID+i), input)
244244 }
245245
246246 wg.Wait()
3434 #include "textflag.h"
3535
3636 #define ROTATE_XS \
37 MOVOU X4, X15 \
38 MOVOU X5, X4 \
39 MOVOU X6, X5 \
40 MOVOU X7, X6 \
41 MOVOU X15, X7
37 MOVOU X4, X15 \
38 MOVOU X5, X4 \
39 MOVOU X6, X5 \
40 MOVOU X7, X6 \
41 MOVOU X15, X7
4242
4343 // compute s0 four at a time and s1 two at a time
4444 // compute W[-16] + W[-7] 4 at a time
4545 #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \
46 MOVL e, R13 \ /* y0 = e */
47 ROLL $18, R13 \ /* y0 = e >> (25-11) */
48 MOVL a, R14 \ /* y1 = a */
49 LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */
50 ROLL $23, R14 \ /* y1 = a >> (22-13) */
51 XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */
52 MOVL f, R15 \ /* y2 = f */
53 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
54 XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */
55 XORL g, R15 \ /* y2 = f^g */
56 LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */
57 XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */
58 ANDL e, R15 \ /* y2 = (f^g)&e */
59 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
60 \ /* */
61 \ /* compute s0 */
62 \ /* */
63 LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */
64 XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
65 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
66 XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */
67 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
68 ADDL R13, R15 \ /* y2 = S1 + CH */
69 ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */
70 MOVL a, R13 \ /* y0 = a */
71 ADDL R15, h \ /* h = h + S1 + CH + k + w */
72 \ /* ROTATE_ARGS */
73 MOVL a, R15 \ /* y2 = a */
74 LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */
75 ORL c, R13 \ /* y0 = a|c */
76 ADDL h, d \ /* d = d + h + S1 + CH + k + w */
77 ANDL c, R15 \ /* y2 = a&c */
78 LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */
79 ANDL b, R13 \ /* y0 = (a|c)&b */
80 ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */
81 LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */
82 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
83 ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */
84 \ /* ROTATE_ARGS */
85 MOVL d, R13 \ /* y0 = e */
86 MOVL h, R14 \ /* y1 = a */
87 ROLL $18, R13 \ /* y0 = e >> (25-11) */
88 XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */
89 MOVL e, R15 \ /* y2 = f */
90 ROLL $23, R14 \ /* y1 = a >> (22-13) */
91 LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */
92 XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */
93 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
94 XORL f, R15 \ /* y2 = f^g */
95 LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */
96 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
97 XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
98 ANDL d, R15 \ /* y2 = (f^g)&e */
99 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
100 LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */
101 XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
102 XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */
103 LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */
104 ADDL R13, R15 \ /* y2 = S1 + CH */
105 ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */
106 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
107 LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */
108 MOVL h, R13 \ /* y0 = a */
109 ADDL R15, g \ /* h = h + S1 + CH + k + w */
110 MOVL h, R15 \ /* y2 = a */
111 LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */
112 ORL b, R13 \ /* y0 = a|c */
113 ADDL g, c \ /* d = d + h + S1 + CH + k + w */
114 ANDL b, R15 \ /* y2 = a&c */
115 \ /* */
116 \ /* compute low s1 */
117 \ /* */
118 LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */
119 ANDL a, R13 \ /* y0 = (a|c)&b */
120 ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */
121 LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */
122 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
123 ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */
124 \ /* ROTATE_ARGS */
125 MOVL c, R13 \ /* y0 = e */
126 MOVL g, R14 \ /* y1 = a */
127 ROLL $18, R13 \ /* y0 = e >> (25-11) */
128 XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */
129 ROLL $23, R14 \ /* y1 = a >> (22-13) */
130 MOVL d, R15 \ /* y2 = f */
131 XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */
132 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
133 LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */
134 XORL e, R15 \ /* y2 = f^g */
135 LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */
136 XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
137 ANDL c, R15 \ /* y2 = (f^g)&e */
138 LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */
139 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
140 XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
141 XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */
142 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
143 LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */
144 ADDL R13, R15 \ /* y2 = S1 + CH */
145 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
146 ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */
147 LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */
148 MOVL g, R13 \ /* y0 = a */
149 ADDL R15, f \ /* h = h + S1 + CH + k + w */
150 MOVL g, R15 \ /* y2 = a */
151 LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */
152 ORL a, R13 \ /* y0 = a|c */
153 ADDL f, b \ /* d = d + h + S1 + CH + k + w */
154 ANDL a, R15 \ /* y2 = a&c */
155 LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */
156 ANDL h, R13 \ /* y0 = (a|c)&b */
157 ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */
158 \ /* */
159 \ /* compute high s1 */
160 \ /* */
161 LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */
162 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
163 ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */
164 \ /* ROTATE_ARGS */
165 MOVL b, R13 \ /* y0 = e */
166 ROLL $18, R13 \ /* y0 = e >> (25-11) */
167 MOVL f, R14 \ /* y1 = a */
168 ROLL $23, R14 \ /* y1 = a >> (22-13) */
169 XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */
170 MOVL c, R15 \ /* y2 = f */
171 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
172 LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */
173 XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */
174 XORL d, R15 \ /* y2 = f^g */
175 LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */
176 XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
177 ANDL b, R15 \ /* y2 = (f^g)&e */
178 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
179 LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */
180 XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
181 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
182 XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */
183 LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */
184 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
185 ADDL R13, R15 \ /* y2 = S1 + CH */
186 ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */
187 LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */
188 MOVL f, R13 \ /* y0 = a */
189 ADDL R15, e \ /* h = h + S1 + CH + k + w */
190 MOVL f, R15 \ /* y2 = a */
191 LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */
192 ORL h, R13 \ /* y0 = a|c */
193 ADDL e, a \ /* d = d + h + S1 + CH + k + w */
194 ANDL h, R15 \ /* y2 = a&c */
195 LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */
196 ANDL g, R13 \ /* y0 = (a|c)&b */
197 ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */
198 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
199 ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */
200 \ /* ROTATE_ARGS */
201 ROTATE_XS
202
46 MOVL e, R13 \ // y0 = e
47 ROLL $18, R13 \ // y0 = e >> (25-11)
48 MOVL a, R14 \ // y1 = a
49 LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */
50 ROLL $23, R14 \ // y1 = a >> (22-13)
51 XORL e, R13 \ // y0 = e ^ (e >> (25-11))
52 MOVL f, R15 \ // y2 = f
53 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
54 XORL a, R14 \ // y1 = a ^ (a >> (22-13)
55 XORL g, R15 \ // y2 = f^g
56 LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */
57 XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) )
58 ANDL e, R15 \ // y2 = (f^g)&e
59 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
60 \
61 \ // compute s0
62 \
63 LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */
64 XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
65 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
66 XORL g, R15 \ // y2 = CH = ((f^g)&e)^g
67 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
68 ADDL R13, R15 \ // y2 = S1 + CH
69 ADDL _xfer+48(FP), R15 \ // y2 = k + w + S1 + CH
70 MOVL a, R13 \ // y0 = a
71 ADDL R15, h \ // h = h + S1 + CH + k + w
72 \ // ROTATE_ARGS
73 MOVL a, R15 \ // y2 = a
74 LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */
75 ORL c, R13 \ // y0 = a|c
76 ADDL h, d \ // d = d + h + S1 + CH + k + w
77 ANDL c, R15 \ // y2 = a&c
78 LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */
79 ANDL b, R13 \ // y0 = (a|c)&b
80 ADDL R14, h \ // h = h + S1 + CH + k + w + S0
81 LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */
82 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
83 ADDL R13, h \ // h = h + S1 + CH + k + w + S0 + MAJ
84 \ // ROTATE_ARGS
85 MOVL d, R13 \ // y0 = e
86 MOVL h, R14 \ // y1 = a
87 ROLL $18, R13 \ // y0 = e >> (25-11)
88 XORL d, R13 \ // y0 = e ^ (e >> (25-11))
89 MOVL e, R15 \ // y2 = f
90 ROLL $23, R14 \ // y1 = a >> (22-13)
91 LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */
92 XORL h, R14 \ // y1 = a ^ (a >> (22-13)
93 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
94 XORL f, R15 \ // y2 = f^g
95 LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */
96 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
97 XORL d, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
98 ANDL d, R15 \ // y2 = (f^g)&e
99 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
100 LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */
101 XORL h, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
102 XORL f, R15 \ // y2 = CH = ((f^g)&e)^g
103 LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */
104 ADDL R13, R15 \ // y2 = S1 + CH
105 ADDL _xfer+52(FP), R15 \ // y2 = k + w + S1 + CH
106 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
107 LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */
108 MOVL h, R13 \ // y0 = a
109 ADDL R15, g \ // h = h + S1 + CH + k + w
110 MOVL h, R15 \ // y2 = a
111 LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */
112 ORL b, R13 \ // y0 = a|c
113 ADDL g, c \ // d = d + h + S1 + CH + k + w
114 ANDL b, R15 \ // y2 = a&c
115 \
116 \ // compute low s1
117 \
118 LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */
119 ANDL a, R13 \ // y0 = (a|c)&b
120 ADDL R14, g \ // h = h + S1 + CH + k + w + S0
121 LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */
122 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
123 ADDL R13, g \ // h = h + S1 + CH + k + w + S0 + MAJ
124 \ // ROTATE_ARGS
125 MOVL c, R13 \ // y0 = e
126 MOVL g, R14 \ // y1 = a
127 ROLL $18, R13 \ // y0 = e >> (25-11)
128 XORL c, R13 \ // y0 = e ^ (e >> (25-11))
129 ROLL $23, R14 \ // y1 = a >> (22-13)
130 MOVL d, R15 \ // y2 = f
131 XORL g, R14 \ // y1 = a ^ (a >> (22-13)
132 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
133 LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */
134 XORL e, R15 \ // y2 = f^g
135 LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */
136 XORL c, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
137 ANDL c, R15 \ // y2 = (f^g)&e
138 LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */
139 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
140 XORL g, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
141 XORL e, R15 \ // y2 = CH = ((f^g)&e)^g
142 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
143 LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */
144 ADDL R13, R15 \ // y2 = S1 + CH
145 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
146 ADDL _xfer+56(FP), R15 \ // y2 = k + w + S1 + CH
147 LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */
148 MOVL g, R13 \ // y0 = a
149 ADDL R15, f \ // h = h + S1 + CH + k + w
150 MOVL g, R15 \ // y2 = a
151 LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */
152 ORL a, R13 \ // y0 = a|c
153 ADDL f, b \ // d = d + h + S1 + CH + k + w
154 ANDL a, R15 \ // y2 = a&c
155 LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */
156 ANDL h, R13 \ // y0 = (a|c)&b
157 ADDL R14, f \ // h = h + S1 + CH + k + w + S0
158 \
159 \ // compute high s1
160 \
161 LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */
162 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
163 ADDL R13, f \ // h = h + S1 + CH + k + w + S0 + MAJ
164 \ // ROTATE_ARGS
165 MOVL b, R13 \ // y0 = e
166 ROLL $18, R13 \ // y0 = e >> (25-11)
167 MOVL f, R14 \ // y1 = a
168 ROLL $23, R14 \ // y1 = a >> (22-13)
169 XORL b, R13 \ // y0 = e ^ (e >> (25-11))
170 MOVL c, R15 \ // y2 = f
171 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
172 LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */
173 XORL f, R14 \ // y1 = a ^ (a >> (22-13)
174 XORL d, R15 \ // y2 = f^g
175 LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */
176 XORL b, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
177 ANDL b, R15 \ // y2 = (f^g)&e
178 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
179 LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */
180 XORL f, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
181 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
182 XORL d, R15 \ // y2 = CH = ((f^g)&e)^g
183 LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */
184 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
185 ADDL R13, R15 \ // y2 = S1 + CH
186 ADDL _xfer+60(FP), R15 \ // y2 = k + w + S1 + CH
187 LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */
188 MOVL f, R13 \ // y0 = a
189 ADDL R15, e \ // h = h + S1 + CH + k + w
190 MOVL f, R15 \ // y2 = a
191 LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */
192 ORL h, R13 \ // y0 = a|c
193 ADDL e, a \ // d = d + h + S1 + CH + k + w
194 ANDL h, R15 \ // y2 = a&c
195 LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */
196 ANDL g, R13 \ // y0 = (a|c)&b
197 ADDL R14, e \ // h = h + S1 + CH + k + w + S0
198 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
199 ADDL R13, e \ // h = h + S1 + CH + k + w + S0 + MAJ
200 \ // ROTATE_ARGS
201 ROTATE_XS
203202
204203 #define DO_ROUND(a, b, c, d, e, f, g, h, offset) \
205 MOVL e, R13 \ /* y0 = e */
206 ROLL $18, R13 \ /* y0 = e >> (25-11) */
207 MOVL a, R14 \ /* y1 = a */
208 XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */
209 ROLL $23, R14 \ /* y1 = a >> (22-13) */
210 MOVL f, R15 \ /* y2 = f */
211 XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */
212 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
213 XORL g, R15 \ /* y2 = f^g */
214 XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
215 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
216 ANDL e, R15 \ /* y2 = (f^g)&e */
217 XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
218 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
219 XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */
220 ADDL R13, R15 \ /* y2 = S1 + CH */
221 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
222 ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */
223 MOVL a, R13 \ /* y0 = a */
224 ADDL R15, h \ /* h = h + S1 + CH + k + w */
225 MOVL a, R15 \ /* y2 = a */
226 ORL c, R13 \ /* y0 = a|c */
227 ADDL h, d \ /* d = d + h + S1 + CH + k + w */
228 ANDL c, R15 \ /* y2 = a&c */
229 ANDL b, R13 \ /* y0 = (a|c)&b */
230 ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */
231 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
232 ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */
233
204 MOVL e, R13 \ // y0 = e
205 ROLL $18, R13 \ // y0 = e >> (25-11)
206 MOVL a, R14 \ // y1 = a
207 XORL e, R13 \ // y0 = e ^ (e >> (25-11))
208 ROLL $23, R14 \ // y1 = a >> (22-13)
209 MOVL f, R15 \ // y2 = f
210 XORL a, R14 \ // y1 = a ^ (a >> (22-13)
211 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
212 XORL g, R15 \ // y2 = f^g
213 XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
214 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
215 ANDL e, R15 \ // y2 = (f^g)&e
216 XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
217 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
218 XORL g, R15 \ // y2 = CH = ((f^g)&e)^g
219 ADDL R13, R15 \ // y2 = S1 + CH
220 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
221 ADDL _xfer+offset(FP), R15 \ // y2 = k + w + S1 + CH
222 MOVL a, R13 \ // y0 = a
223 ADDL R15, h \ // h = h + S1 + CH + k + w
224 MOVL a, R15 \ // y2 = a
225 ORL c, R13 \ // y0 = a|c
226 ADDL h, d \ // d = d + h + S1 + CH + k + w
227 ANDL c, R15 \ // y2 = a&c
228 ANDL b, R13 \ // y0 = (a|c)&b
229 ADDL R14, h \ // h = h + S1 + CH + k + w + S0
230 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
231 ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ
234232
235233 // func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64)
236234 TEXT ·blockAvx(SB), 7, $0
237235
238 MOVQ h+0(FP), SI // SI: &h
239 MOVQ message+24(FP), R8 // &message
240 MOVQ lenmessage+32(FP), R9 // length of message
241 CMPQ R9, $0
242 JEQ done_hash
243 ADDQ R8, R9
244 MOVQ R9, _inp_end+64(FP) // store end of message
245
246 // Register definition
247 // a --> eax
248 // b --> ebx
249 // c --> ecx
250 // d --> r8d
251 // e --> edx
252 // f --> r9d
253 // g --> r10d
254 // h --> r11d
255 //
256 // y0 --> r13d
257 // y1 --> r14d
258 // y2 --> r15d
259
260 MOVL (0*4)(SI), AX // a = H0
261 MOVL (1*4)(SI), BX // b = H1
262 MOVL (2*4)(SI), CX // c = H2
263 MOVL (3*4)(SI), R8 // d = H3
264 MOVL (4*4)(SI), DX // e = H4
265 MOVL (5*4)(SI), R9 // f = H5
266 MOVL (6*4)(SI), R10 // g = H6
267 MOVL (7*4)(SI), R11 // h = H7
236 MOVQ h+0(FP), SI // SI: &h
237 MOVQ message+24(FP), R8 // &message
238 MOVQ lenmessage+32(FP), R9 // length of message
239 CMPQ R9, $0
240 JEQ done_hash
241 ADDQ R8, R9
242 MOVQ R9, _inp_end+64(FP) // store end of message
243
244 // Register definition
245 // a --> eax
246 // b --> ebx
247 // c --> ecx
248 // d --> r8d
249 // e --> edx
250 // f --> r9d
251 // g --> r10d
252 // h --> r11d
253 //
254 // y0 --> r13d
255 // y1 --> r14d
256 // y2 --> r15d
257
258 MOVL (0*4)(SI), AX // a = H0
259 MOVL (1*4)(SI), BX // b = H1
260 MOVL (2*4)(SI), CX // c = H2
261 MOVL (3*4)(SI), R8 // d = H3
262 MOVL (4*4)(SI), DX // e = H4
263 MOVL (5*4)(SI), R9 // f = H5
264 MOVL (6*4)(SI), R10 // g = H6
265 MOVL (7*4)(SI), R11 // h = H7
268266
269267 MOVOU bflipMask<>(SB), X13
270 MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA
271 MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00
272
273 MOVQ message+24(FP), SI // SI: &message
268 MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA
269 MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00
270
271 MOVQ message+24(FP), SI // SI: &message
274272
275273 loop0:
276274 LEAQ constants<>(SB), BP
277275
278276 // byte swap first 16 dwords
279 MOVOU 0*16(SI), X4
280 LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13
281 MOVOU 1*16(SI), X5
282 LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13
283 MOVOU 2*16(SI), X6
284 LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13
285 MOVOU 3*16(SI), X7
286 LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13
287
288 MOVQ SI, _inp+72(FP)
289 MOVD $0x3, DI
277 MOVOU 0*16(SI), X4
278 LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13
279 MOVOU 1*16(SI), X5
280 LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13
281 MOVOU 2*16(SI), X6
282 LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13
283 MOVOU 3*16(SI), X7
284 LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13
285
286 MOVQ SI, _inp+72(FP)
287 MOVD $0x3, DI
290288
291289 // schedule 48 input dwords, by doing 3 rounds of 16 each
292290 loop1:
293 LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */
294 MOVOU X9, _xfer+48(FP)
295 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
296
297 LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */
298 MOVOU X9, _xfer+48(FP)
299 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
300
301 LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */
302 MOVOU X9, _xfer+48(FP)
303 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
304
305 LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */
306 MOVOU X9, _xfer+48(FP)
307 ADDQ $64, BP
308 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
309
310 SUBQ $1, DI
311 JNE loop1
312
313 MOVD $0x2, DI
291 LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */
292 MOVOU X9, _xfer+48(FP)
293 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
294
295 LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */
296 MOVOU X9, _xfer+48(FP)
297 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
298
299 LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */
300 MOVOU X9, _xfer+48(FP)
301 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
302
303 LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */
304 MOVOU X9, _xfer+48(FP)
305 ADDQ $64, BP
306 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
307
308 SUBQ $1, DI
309 JNE loop1
310
311 MOVD $0x2, DI
312
314313 loop2:
315 LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */
316 MOVOU X9, _xfer+48(FP)
317 DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48)
318 DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52)
319 DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56)
320 DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60)
321
322 LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */
323 MOVOU X9, _xfer+48(FP)
324 ADDQ $32, BP
325 DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48)
326 DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52)
327 DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56)
328 DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60)
329
330 MOVOU X6, X4
331 MOVOU X7, X5
332
333 SUBQ $1, DI
334 JNE loop2
335
336 MOVQ h+0(FP), SI // SI: &h
337 ADDL (0*4)(SI), AX // H0 = a + H0
338 MOVL AX, (0*4)(SI)
339 ADDL (1*4)(SI), BX // H1 = b + H1
340 MOVL BX, (1*4)(SI)
341 ADDL (2*4)(SI), CX // H2 = c + H2
342 MOVL CX, (2*4)(SI)
343 ADDL (3*4)(SI), R8 // H3 = d + H3
344 MOVL R8, (3*4)(SI)
345 ADDL (4*4)(SI), DX // H4 = e + H4
346 MOVL DX, (4*4)(SI)
347 ADDL (5*4)(SI), R9 // H5 = f + H5
348 MOVL R9, (5*4)(SI)
349 ADDL (6*4)(SI), R10 // H6 = g + H6
350 MOVL R10, (6*4)(SI)
351 ADDL (7*4)(SI), R11 // H7 = h + H7
352 MOVL R11, (7*4)(SI)
353
354 MOVQ _inp+72(FP), SI
314 LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */
315 MOVOU X9, _xfer+48(FP)
316 DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48)
317 DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52)
318 DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56)
319 DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60)
320
321 LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */
322 MOVOU X9, _xfer+48(FP)
323 ADDQ $32, BP
324 DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48)
325 DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52)
326 DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56)
327 DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60)
328
329 MOVOU X6, X4
330 MOVOU X7, X5
331
332 SUBQ $1, DI
333 JNE loop2
334
335 MOVQ h+0(FP), SI // SI: &h
336 ADDL (0*4)(SI), AX // H0 = a + H0
337 MOVL AX, (0*4)(SI)
338 ADDL (1*4)(SI), BX // H1 = b + H1
339 MOVL BX, (1*4)(SI)
340 ADDL (2*4)(SI), CX // H2 = c + H2
341 MOVL CX, (2*4)(SI)
342 ADDL (3*4)(SI), R8 // H3 = d + H3
343 MOVL R8, (3*4)(SI)
344 ADDL (4*4)(SI), DX // H4 = e + H4
345 MOVL DX, (4*4)(SI)
346 ADDL (5*4)(SI), R9 // H5 = f + H5
347 MOVL R9, (5*4)(SI)
348 ADDL (6*4)(SI), R10 // H6 = g + H6
349 MOVL R10, (6*4)(SI)
350 ADDL (7*4)(SI), R11 // H7 = h + H7
351 MOVL R11, (7*4)(SI)
352
353 MOVQ _inp+72(FP), SI
355354 ADDQ $64, SI
356355 CMPQ _inp_end+64(FP), SI
357 JNE loop0
356 JNE loop0
358357
359358 done_hash:
360 RET
359 RET
361360
362361 // Constants table
363362 DATA constants<>+0x0(SB)/8, $0x71374491428a2f98
0 //+build !noasm
1
2 package sha256
3
4 //go:noescape
5 func blockSha(h *[8]uint32, message []uint8)
0 //+build !noasm !appengine
1
2 // SHA intrinsic version of SHA256
3
4 // Minio Cloud Storage, (C) 2018 Minio, Inc.
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 //
10 // http://www.apache.org/licenses/LICENSE-2.0
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18
19 #include "textflag.h"
20
21 DATA K<>+0x00(SB)/4, $0x428a2f98
22 DATA K<>+0x04(SB)/4, $0x71374491
23 DATA K<>+0x08(SB)/4, $0xb5c0fbcf
24 DATA K<>+0x0c(SB)/4, $0xe9b5dba5
25 DATA K<>+0x10(SB)/4, $0x3956c25b
26 DATA K<>+0x14(SB)/4, $0x59f111f1
27 DATA K<>+0x18(SB)/4, $0x923f82a4
28 DATA K<>+0x1c(SB)/4, $0xab1c5ed5
29 DATA K<>+0x20(SB)/4, $0xd807aa98
30 DATA K<>+0x24(SB)/4, $0x12835b01
31 DATA K<>+0x28(SB)/4, $0x243185be
32 DATA K<>+0x2c(SB)/4, $0x550c7dc3
33 DATA K<>+0x30(SB)/4, $0x72be5d74
34 DATA K<>+0x34(SB)/4, $0x80deb1fe
35 DATA K<>+0x38(SB)/4, $0x9bdc06a7
36 DATA K<>+0x3c(SB)/4, $0xc19bf174
37 DATA K<>+0x40(SB)/4, $0xe49b69c1
38 DATA K<>+0x44(SB)/4, $0xefbe4786
39 DATA K<>+0x48(SB)/4, $0x0fc19dc6
40 DATA K<>+0x4c(SB)/4, $0x240ca1cc
41 DATA K<>+0x50(SB)/4, $0x2de92c6f
42 DATA K<>+0x54(SB)/4, $0x4a7484aa
43 DATA K<>+0x58(SB)/4, $0x5cb0a9dc
44 DATA K<>+0x5c(SB)/4, $0x76f988da
45 DATA K<>+0x60(SB)/4, $0x983e5152
46 DATA K<>+0x64(SB)/4, $0xa831c66d
47 DATA K<>+0x68(SB)/4, $0xb00327c8
48 DATA K<>+0x6c(SB)/4, $0xbf597fc7
49 DATA K<>+0x70(SB)/4, $0xc6e00bf3
50 DATA K<>+0x74(SB)/4, $0xd5a79147
51 DATA K<>+0x78(SB)/4, $0x06ca6351
52 DATA K<>+0x7c(SB)/4, $0x14292967
53 DATA K<>+0x80(SB)/4, $0x27b70a85
54 DATA K<>+0x84(SB)/4, $0x2e1b2138
55 DATA K<>+0x88(SB)/4, $0x4d2c6dfc
56 DATA K<>+0x8c(SB)/4, $0x53380d13
57 DATA K<>+0x90(SB)/4, $0x650a7354
58 DATA K<>+0x94(SB)/4, $0x766a0abb
59 DATA K<>+0x98(SB)/4, $0x81c2c92e
60 DATA K<>+0x9c(SB)/4, $0x92722c85
61 DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
62 DATA K<>+0xa4(SB)/4, $0xa81a664b
63 DATA K<>+0xa8(SB)/4, $0xc24b8b70
64 DATA K<>+0xac(SB)/4, $0xc76c51a3
65 DATA K<>+0xb0(SB)/4, $0xd192e819
66 DATA K<>+0xb4(SB)/4, $0xd6990624
67 DATA K<>+0xb8(SB)/4, $0xf40e3585
68 DATA K<>+0xbc(SB)/4, $0x106aa070
69 DATA K<>+0xc0(SB)/4, $0x19a4c116
70 DATA K<>+0xc4(SB)/4, $0x1e376c08
71 DATA K<>+0xc8(SB)/4, $0x2748774c
72 DATA K<>+0xcc(SB)/4, $0x34b0bcb5
73 DATA K<>+0xd0(SB)/4, $0x391c0cb3
74 DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
75 DATA K<>+0xd8(SB)/4, $0x5b9cca4f
76 DATA K<>+0xdc(SB)/4, $0x682e6ff3
77 DATA K<>+0xe0(SB)/4, $0x748f82ee
78 DATA K<>+0xe4(SB)/4, $0x78a5636f
79 DATA K<>+0xe8(SB)/4, $0x84c87814
80 DATA K<>+0xec(SB)/4, $0x8cc70208
81 DATA K<>+0xf0(SB)/4, $0x90befffa
82 DATA K<>+0xf4(SB)/4, $0xa4506ceb
83 DATA K<>+0xf8(SB)/4, $0xbef9a3f7
84 DATA K<>+0xfc(SB)/4, $0xc67178f2
85 GLOBL K<>(SB), RODATA|NOPTR, $256
86
87 DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
88 DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
89 GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
90
91 // Register Usage
92 // BX base address of constant table (constant)
93 // DX hash_state (constant)
94 // SI hash_data.data
95 // DI hash_data.data + hash_data.length - 64 (constant)
96 // X0 scratch
97 // X1 scratch
98 // X2 working hash state // ABEF
99 // X3 working hash state // CDGH
100 // X4 first 16 bytes of block
101 // X5 second 16 bytes of block
102 // X6 third 16 bytes of block
103 // X7 fourth 16 bytes of block
104 // X12 saved hash state // ABEF
105 // X13 saved hash state // CDGH
106 // X15 data shuffle mask (constant)
107
108 TEXT ·blockSha(SB), NOSPLIT, $0-32
109 MOVQ h+0(FP), DX
110 MOVQ message_base+8(FP), SI
111 MOVQ message_len+16(FP), DI
112 LEAQ -64(SI)(DI*1), DI
113 MOVOU (DX), X2
114 MOVOU 16(DX), X1
115 MOVO X2, X3
116 PUNPCKLLQ X1, X2
117 PUNPCKHLQ X1, X3
118 PSHUFD $0x27, X2, X2
119 PSHUFD $0x27, X3, X3
120 MOVO SHUF_MASK<>(SB), X15
121 LEAQ K<>(SB), BX
122
123 JMP TEST
124
125 LOOP:
126 MOVO X2, X12
127 MOVO X3, X13
128
129 // load block and shuffle
130 MOVOU (SI), X4
131 MOVOU 16(SI), X5
132 MOVOU 32(SI), X6
133 MOVOU 48(SI), X7
134 PSHUFB X15, X4
135 PSHUFB X15, X5
136 PSHUFB X15, X6
137 PSHUFB X15, X7
138
139 #define ROUND456 \
140 PADDL X5, X0 \
141 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
142 MOVO X5, X1 \
143 LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
144 PADDL X1, X6 \
145 LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5
146 PSHUFD $0x4e, X0, X0 \
147 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
148 LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
149
150 #define ROUND567 \
151 PADDL X6, X0 \
152 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
153 MOVO X6, X1 \
154 LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
155 PADDL X1, X7 \
156 LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6
157 PSHUFD $0x4e, X0, X0 \
158 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
159 LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
160
161 #define ROUND674 \
162 PADDL X7, X0 \
163 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
164 MOVO X7, X1 \
165 LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
166 PADDL X1, X4 \
167 LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7
168 PSHUFD $0x4e, X0, X0 \
169 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
170 LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7
171
172 #define ROUND745 \
173 PADDL X4, X0 \
174 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
175 MOVO X4, X1 \
176 LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
177 PADDL X1, X5 \
178 LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4
179 PSHUFD $0x4e, X0, X0 \
180 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
181 LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4
182
183 // rounds 0-3
184 MOVO (BX), X0
185 PADDL X4, X0
186 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
187 PSHUFD $0x4e, X0, X0
188 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
189
190 // rounds 4-7
191 MOVO 1*16(BX), X0
192 PADDL X5, X0
193 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
194 PSHUFD $0x4e, X0, X0
195 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
196 LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
197
198 // rounds 8-11
199 MOVO 2*16(BX), X0
200 PADDL X6, X0
201 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
202 PSHUFD $0x4e, X0, X0
203 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
204 LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
205
206 MOVO 3*16(BX), X0; ROUND674 // rounds 12-15
207 MOVO 4*16(BX), X0; ROUND745 // rounds 16-19
208 MOVO 5*16(BX), X0; ROUND456 // rounds 20-23
209 MOVO 6*16(BX), X0; ROUND567 // rounds 24-27
210 MOVO 7*16(BX), X0; ROUND674 // rounds 28-31
211 MOVO 8*16(BX), X0; ROUND745 // rounds 32-35
212 MOVO 9*16(BX), X0; ROUND456 // rounds 36-39
213 MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
214 MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
215 MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
216
217 // rounds 52-55
218 MOVO 13*16(BX), X0
219 PADDL X5, X0
220 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
221 MOVO X5, X1
222 LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
223 PADDL X1, X6
224 LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5
225 PSHUFD $0x4e, X0, X0
226 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
227
228 // rounds 56-59
229 MOVO 14*16(BX), X0
230 PADDL X6, X0
231 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
232 MOVO X6, X1
233 LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
234 PADDL X1, X7
235 LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6
236 PSHUFD $0x4e, X0, X0
237 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
238
239 // rounds 60-63
240 MOVO 15*16(BX), X0
241 PADDL X7, X0
242 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
243 PSHUFD $0x4e, X0, X0
244 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
245
246 PADDL X12, X2
247 PADDL X13, X3
248
249 ADDQ $64, SI
250
251 TEST:
252 CMPQ SI, DI
253 JBE LOOP
254
255 PSHUFD $0x4e, X3, X0
256 LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
257 PSHUFD $0x4e, X2, X1
258 LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
259 PSHUFD $0x1b, X0, X0
260 PSHUFD $0x1b, X1, X1
261
262 MOVOU X0, (DX)
263 MOVOU X1, 16(DX)
264
265 RET
0 //+build !noasm
1
2 package sha256
3
4 import (
5 "crypto/sha256"
6 "encoding/binary"
7 "testing"
8 )
9
10 func sha256hash(m []byte) (r [32]byte) {
11 var h [8]uint32
12
13 h[0] = 0x6a09e667
14 h[1] = 0xbb67ae85
15 h[2] = 0x3c6ef372
16 h[3] = 0xa54ff53a
17 h[4] = 0x510e527f
18 h[5] = 0x9b05688c
19 h[6] = 0x1f83d9ab
20 h[7] = 0x5be0cd19
21
22 blockSha(&h, m)
23 l0 := len(m)
24 l := l0 & (BlockSize - 1)
25 m = m[l0-l:]
26
27 var k [64]byte
28 copy(k[:], m)
29
30 k[l] = 0x80
31
32 if l >= 56 {
33 blockSha(&h, k[:])
34 binary.LittleEndian.PutUint64(k[0:8], 0)
35 binary.LittleEndian.PutUint64(k[8:16], 0)
36 binary.LittleEndian.PutUint64(k[16:24], 0)
37 binary.LittleEndian.PutUint64(k[24:32], 0)
38 binary.LittleEndian.PutUint64(k[32:40], 0)
39 binary.LittleEndian.PutUint64(k[40:48], 0)
40 binary.LittleEndian.PutUint64(k[48:56], 0)
41 }
42 binary.BigEndian.PutUint64(k[56:64], uint64(l0)<<3)
43 blockSha(&h, k[:])
44
45 binary.BigEndian.PutUint32(r[0:4], h[0])
46 binary.BigEndian.PutUint32(r[4:8], h[1])
47 binary.BigEndian.PutUint32(r[8:12], h[2])
48 binary.BigEndian.PutUint32(r[12:16], h[3])
49 binary.BigEndian.PutUint32(r[16:20], h[4])
50 binary.BigEndian.PutUint32(r[20:24], h[5])
51 binary.BigEndian.PutUint32(r[24:28], h[6])
52 binary.BigEndian.PutUint32(r[28:32], h[7])
53
54 return
55 }
56
57 func runTestSha(hashfunc func([]byte) [32]byte) bool {
58 var m = []byte("This is a message. This is a message. This is a message. This is a message.")
59
60 ar := hashfunc(m)
61 br := sha256.Sum256(m)
62
63 return ar == br
64 }
65
66 func TestSha0(t *testing.T) {
67 if !runTestSha(Sum256) {
68 t.Errorf("FAILED")
69 }
70 }
71
72 func TestSha1(t *testing.T) {
73 if sha && ssse3 && sse41 && !runTestSha(sha256hash) {
74 t.Errorf("FAILED")
75 }
76 }
3434 #include "textflag.h"
3535
3636 #define ROTATE_XS \
37 MOVOU X4, X15 \
38 MOVOU X5, X4 \
39 MOVOU X6, X5 \
40 MOVOU X7, X6 \
41 MOVOU X15, X7
37 MOVOU X4, X15 \
38 MOVOU X5, X4 \
39 MOVOU X6, X5 \
40 MOVOU X7, X6 \
41 MOVOU X15, X7
4242
4343 // compute s0 four at a time and s1 two at a time
4444 // compute W[-16] + W[-7] 4 at a time
4545 #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \
46 MOVL e, R13 \ /* y0 = e */
47 ROLL $18, R13 \ /* y0 = e >> (25-11) */
48 MOVL a, R14 \ /* y1 = a */
49 MOVOU X7, X0 \
50 LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */
51 ROLL $23, R14 \ /* y1 = a >> (22-13) */
52 XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */
53 MOVL f, R15 \ /* y2 = f */
54 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
55 XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */
56 XORL g, R15 \ /* y2 = f^g */
57 LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */
58 XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */
59 ANDL e, R15 \ /* y2 = (f^g)&e */
60 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
61 \ /* */
62 \ /* compute s0 */
63 \ /* */
64 MOVOU X5, X1 \
65 LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */
66 XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
67 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
68 XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */
69 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
70 ADDL R13, R15 \ /* y2 = S1 + CH */
71 ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */
72 MOVL a, R13 \ /* y0 = a */
73 ADDL R15, h \ /* h = h + S1 + CH + k + w */
74 \ /* ROTATE_ARGS */
75 MOVL a, R15 \ /* y2 = a */
76 MOVOU X1, X2 \
77 LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */
78 ORL c, R13 \ /* y0 = a|c */
79 ADDL h, d \ /* d = d + h + S1 + CH + k + w */
80 ANDL c, R15 \ /* y2 = a&c */
81 MOVOU X1, X3 \
82 LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */
83 ANDL b, R13 \ /* y0 = (a|c)&b */
84 ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */
85 LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */
86 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
87 ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */
88 \ /* ROTATE_ARGS */
89 MOVL d, R13 \ /* y0 = e */
90 MOVL h, R14 \ /* y1 = a */
91 ROLL $18, R13 \ /* y0 = e >> (25-11) */
92 XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */
93 MOVL e, R15 \ /* y2 = f */
94 ROLL $23, R14 \ /* y1 = a >> (22-13) */
95 MOVOU X1, X2 \
96 LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */
97 XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */
98 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
99 XORL f, R15 \ /* y2 = f^g */
100 MOVOU X1, X8 \
101 LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */
102 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
103 XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
104 ANDL d, R15 \ /* y2 = (f^g)&e */
105 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
106 LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */
107 XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
108 XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */
109 LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */
110 ADDL R13, R15 \ /* y2 = S1 + CH */
111 ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */
112 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
113 LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */
114 MOVL h, R13 \ /* y0 = a */
115 ADDL R15, g \ /* h = h + S1 + CH + k + w */
116 MOVL h, R15 \ /* y2 = a */
117 MOVOU X3, X1 \
118 LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */
119 ORL b, R13 \ /* y0 = a|c */
120 ADDL g, c \ /* d = d + h + S1 + CH + k + w */
121 ANDL b, R15 \ /* y2 = a&c */
122 \ /* */
123 \ /* compute low s1 */
124 \ /* */
125 LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */
126 ANDL a, R13 \ /* y0 = (a|c)&b */
127 ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */
128 LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */
129 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
130 ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */
131 \ /* ROTATE_ARGS */
132 MOVL c, R13 \ /* y0 = e */
133 MOVL g, R14 \ /* y1 = a */
134 ROLL $18, R13 \ /* y0 = e >> (25-11) */
135 XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */
136 ROLL $23, R14 \ /* y1 = a >> (22-13) */
137 MOVL d, R15 \ /* y2 = f */
138 XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */
139 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
140 MOVOU X2, X8 \
141 LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */
142 XORL e, R15 \ /* y2 = f^g */
143 MOVOU X2, X3 \
144 LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */
145 XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
146 ANDL c, R15 \ /* y2 = (f^g)&e */
147 LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */
148 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
149 XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
150 XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */
151 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
152 LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */
153 ADDL R13, R15 \ /* y2 = S1 + CH */
154 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
155 ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */
156 LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */
157 MOVL g, R13 \ /* y0 = a */
158 ADDL R15, f \ /* h = h + S1 + CH + k + w */
159 MOVL g, R15 \ /* y2 = a */
160 LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */
161 ORL a, R13 \ /* y0 = a|c */
162 ADDL f, b \ /* d = d + h + S1 + CH + k + w */
163 ANDL a, R15 \ /* y2 = a&c */
164 LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */
165 ANDL h, R13 \ /* y0 = (a|c)&b */
166 ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */
167 \ /* */
168 \ /* compute high s1 */
169 \ /* */
170 LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */
171 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
172 ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */
173 \ /* ROTATE_ARGS */
174 MOVL b, R13 \ /* y0 = e */
175 ROLL $18, R13 \ /* y0 = e >> (25-11) */
176 MOVL f, R14 \ /* y1 = a */
177 ROLL $23, R14 \ /* y1 = a >> (22-13) */
178 XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */
179 MOVL c, R15 \ /* y2 = f */
180 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
181 MOVOU X2, X11 \
182 LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */
183 XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */
184 XORL d, R15 \ /* y2 = f^g */
185 MOVOU X2, X3 \
186 LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */
187 XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
188 ANDL b, R15 \ /* y2 = (f^g)&e */
189 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
190 LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */
191 XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
192 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
193 XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */
194 LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */
195 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
196 ADDL R13, R15 \ /* y2 = S1 + CH */
197 ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */
198 LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */
199 MOVL f, R13 \ /* y0 = a */
200 ADDL R15, e \ /* h = h + S1 + CH + k + w */
201 MOVL f, R15 \ /* y2 = a */
202 LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */
203 ORL h, R13 \ /* y0 = a|c */
204 ADDL e, a \ /* d = d + h + S1 + CH + k + w */
205 ANDL h, R15 \ /* y2 = a&c */
206 MOVOU X11, X4 \
207 LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */
208 ANDL g, R13 \ /* y0 = (a|c)&b */
209 ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */
210 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
211 ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */
212 \ /* ROTATE_ARGS */
213 ROTATE_XS
214
46 MOVL e, R13 \ // y0 = e
47 ROLL $18, R13 \ // y0 = e >> (25-11)
48 MOVL a, R14 \ // y1 = a
49 MOVOU X7, X0 \
50 LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */
51 ROLL $23, R14 \ // y1 = a >> (22-13)
52 XORL e, R13 \ // y0 = e ^ (e >> (25-11))
53 MOVL f, R15 \ // y2 = f
54 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
55 XORL a, R14 \ // y1 = a ^ (a >> (22-13)
56 XORL g, R15 \ // y2 = f^g
57 LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */
58 XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) )
59 ANDL e, R15 \ // y2 = (f^g)&e
60 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
61 \
62 \ // compute s0
63 \
64 MOVOU X5, X1 \
65 LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */
66 XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
67 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
68 XORL g, R15 \ // y2 = CH = ((f^g)&e)^g
69 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
70 ADDL R13, R15 \ // y2 = S1 + CH
71 ADDL _xfer+48(FP), R15 \ // y2 = k + w + S1 + CH
72 MOVL a, R13 \ // y0 = a
73 ADDL R15, h \ // h = h + S1 + CH + k + w
74 \ // ROTATE_ARGS
75 MOVL a, R15 \ // y2 = a
76 MOVOU X1, X2 \
77 LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */
78 ORL c, R13 \ // y0 = a|c
79 ADDL h, d \ // d = d + h + S1 + CH + k + w
80 ANDL c, R15 \ // y2 = a&c
81 MOVOU X1, X3 \
82 LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */
83 ANDL b, R13 \ // y0 = (a|c)&b
84 ADDL R14, h \ // h = h + S1 + CH + k + w + S0
85 LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */
86 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
87 ADDL R13, h \ // h = h + S1 + CH + k + w + S0 + MAJ
88 \ // ROTATE_ARGS
89 MOVL d, R13 \ // y0 = e
90 MOVL h, R14 \ // y1 = a
91 ROLL $18, R13 \ // y0 = e >> (25-11)
92 XORL d, R13 \ // y0 = e ^ (e >> (25-11))
93 MOVL e, R15 \ // y2 = f
94 ROLL $23, R14 \ // y1 = a >> (22-13)
95 MOVOU X1, X2 \
96 LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */
97 XORL h, R14 \ // y1 = a ^ (a >> (22-13)
98 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
99 XORL f, R15 \ // y2 = f^g
100 MOVOU X1, X8 \
101 LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */
102 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
103 XORL d, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
104 ANDL d, R15 \ // y2 = (f^g)&e
105 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
106 LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */
107 XORL h, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
108 XORL f, R15 \ // y2 = CH = ((f^g)&e)^g
109 LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */
110 ADDL R13, R15 \ // y2 = S1 + CH
111 ADDL _xfer+52(FP), R15 \ // y2 = k + w + S1 + CH
112 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
113 LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */
114 MOVL h, R13 \ // y0 = a
115 ADDL R15, g \ // h = h + S1 + CH + k + w
116 MOVL h, R15 \ // y2 = a
117 MOVOU X3, X1 \
118 LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */
119 ORL b, R13 \ // y0 = a|c
120 ADDL g, c \ // d = d + h + S1 + CH + k + w
121 ANDL b, R15 \ // y2 = a&c
122 \
123 \ // compute low s1
124 \
125 LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */
126 ANDL a, R13 \ // y0 = (a|c)&b
127 ADDL R14, g \ // h = h + S1 + CH + k + w + S0
128 LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */
129 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
130 ADDL R13, g \ // h = h + S1 + CH + k + w + S0 + MAJ
131 \ // ROTATE_ARGS
132 MOVL c, R13 \ // y0 = e
133 MOVL g, R14 \ // y1 = a
134 ROLL $18, R13 \ // y0 = e >> (25-11)
135 XORL c, R13 \ // y0 = e ^ (e >> (25-11))
136 ROLL $23, R14 \ // y1 = a >> (22-13)
137 MOVL d, R15 \ // y2 = f
138 XORL g, R14 \ // y1 = a ^ (a >> (22-13)
139 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
140 MOVOU X2, X8 \
141 LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */
142 XORL e, R15 \ // y2 = f^g
143 MOVOU X2, X3 \
144 LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */
145 XORL c, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
146 ANDL c, R15 \ // y2 = (f^g)&e
147 LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */
148 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
149 XORL g, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
150 XORL e, R15 \ // y2 = CH = ((f^g)&e)^g
151 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
152 LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */
153 ADDL R13, R15 \ // y2 = S1 + CH
154 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
155 ADDL _xfer+56(FP), R15 \ // y2 = k + w + S1 + CH
156 LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */
157 MOVL g, R13 \ // y0 = a
158 ADDL R15, f \ // h = h + S1 + CH + k + w
159 MOVL g, R15 \ // y2 = a
160 LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */
161 ORL a, R13 \ // y0 = a|c
162 ADDL f, b \ // d = d + h + S1 + CH + k + w
163 ANDL a, R15 \ // y2 = a&c
164 LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */
165 ANDL h, R13 \ // y0 = (a|c)&b
166 ADDL R14, f \ // h = h + S1 + CH + k + w + S0
167 \
168 \ // compute high s1
169 \
170 LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */
171 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
172 ADDL R13, f \ // h = h + S1 + CH + k + w + S0 + MAJ
173 \ // ROTATE_ARGS
174 MOVL b, R13 \ // y0 = e
175 ROLL $18, R13 \ // y0 = e >> (25-11)
176 MOVL f, R14 \ // y1 = a
177 ROLL $23, R14 \ // y1 = a >> (22-13)
178 XORL b, R13 \ // y0 = e ^ (e >> (25-11))
179 MOVL c, R15 \ // y2 = f
180 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
181 MOVOU X2, X11 \
182 LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */
183 XORL f, R14 \ // y1 = a ^ (a >> (22-13)
184 XORL d, R15 \ // y2 = f^g
185 MOVOU X2, X3 \
186 LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */
187 XORL b, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
188 ANDL b, R15 \ // y2 = (f^g)&e
189 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
190 LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */
191 XORL f, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
192 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
193 XORL d, R15 \ // y2 = CH = ((f^g)&e)^g
194 LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */
195 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
196 ADDL R13, R15 \ // y2 = S1 + CH
197 ADDL _xfer+60(FP), R15 \ // y2 = k + w + S1 + CH
198 LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */
199 MOVL f, R13 \ // y0 = a
200 ADDL R15, e \ // h = h + S1 + CH + k + w
201 MOVL f, R15 \ // y2 = a
202 LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */
203 ORL h, R13 \ // y0 = a|c
204 ADDL e, a \ // d = d + h + S1 + CH + k + w
205 ANDL h, R15 \ // y2 = a&c
206 MOVOU X11, X4 \
207 LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */
208 ANDL g, R13 \ // y0 = (a|c)&b
209 ADDL R14, e \ // h = h + S1 + CH + k + w + S0
210 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
211 ADDL R13, e \ // h = h + S1 + CH + k + w + S0 + MAJ
212 \ // ROTATE_ARGS
213 ROTATE_XS
215214
216215 #define DO_ROUND(a, b, c, d, e, f, g, h, offset) \
217 MOVL e, R13 \ /* y0 = e */
218 ROLL $18, R13 \ /* y0 = e >> (25-11) */
219 MOVL a, R14 \ /* y1 = a */
220 XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */
221 ROLL $23, R14 \ /* y1 = a >> (22-13) */
222 MOVL f, R15 \ /* y2 = f */
223 XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */
224 ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */
225 XORL g, R15 \ /* y2 = f^g */
226 XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */
227 ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */
228 ANDL e, R15 \ /* y2 = (f^g)&e */
229 XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */
230 ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */
231 XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */
232 ADDL R13, R15 \ /* y2 = S1 + CH */
233 ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */
234 ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */
235 MOVL a, R13 \ /* y0 = a */
236 ADDL R15, h \ /* h = h + S1 + CH + k + w */
237 MOVL a, R15 \ /* y2 = a */
238 ORL c, R13 \ /* y0 = a|c */
239 ADDL h, d \ /* d = d + h + S1 + CH + k + w */
240 ANDL c, R15 \ /* y2 = a&c */
241 ANDL b, R13 \ /* y0 = (a|c)&b */
242 ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */
243 ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */
244 ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */
245
216 MOVL e, R13 \ // y0 = e
217 ROLL $18, R13 \ // y0 = e >> (25-11)
218 MOVL a, R14 \ // y1 = a
219 XORL e, R13 \ // y0 = e ^ (e >> (25-11))
220 ROLL $23, R14 \ // y1 = a >> (22-13)
221 MOVL f, R15 \ // y2 = f
222 XORL a, R14 \ // y1 = a ^ (a >> (22-13)
223 ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6))
224 XORL g, R15 \ // y2 = f^g
225 XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
226 ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2))
227 ANDL e, R15 \ // y2 = (f^g)&e
228 XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
229 ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
230 XORL g, R15 \ // y2 = CH = ((f^g)&e)^g
231 ADDL R13, R15 \ // y2 = S1 + CH
232 ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
233 ADDL _xfer+offset(FP), R15 \ // y2 = k + w + S1 + CH
234 MOVL a, R13 \ // y0 = a
235 ADDL R15, h \ // h = h + S1 + CH + k + w
236 MOVL a, R15 \ // y2 = a
237 ORL c, R13 \ // y0 = a|c
238 ADDL h, d \ // d = d + h + S1 + CH + k + w
239 ANDL c, R15 \ // y2 = a&c
240 ANDL b, R13 \ // y0 = (a|c)&b
241 ADDL R14, h \ // h = h + S1 + CH + k + w + S0
242 ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c)
243 ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ
246244
247245 // func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64)
248246 TEXT ·blockSsse(SB), 7, $0
249247
250 MOVQ h+0(FP), SI // SI: &h
251 MOVQ message+24(FP), R8 // &message
252 MOVQ lenmessage+32(FP), R9 // length of message
253 CMPQ R9, $0
254 JEQ done_hash
255 ADDQ R8, R9
256 MOVQ R9, _inp_end+64(FP) // store end of message
257
258 // Register definition
259 // a --> eax
260 // b --> ebx
261 // c --> ecx
262 // d --> r8d
263 // e --> edx
264 // f --> r9d
265 // g --> r10d
266 // h --> r11d
267 //
268 // y0 --> r13d
269 // y1 --> r14d
270 // y2 --> r15d
271
272 MOVL (0*4)(SI), AX // a = H0
273 MOVL (1*4)(SI), BX // b = H1
274 MOVL (2*4)(SI), CX // c = H2
275 MOVL (3*4)(SI), R8 // d = H3
276 MOVL (4*4)(SI), DX // e = H4
277 MOVL (5*4)(SI), R9 // f = H5
278 MOVL (6*4)(SI), R10 // g = H6
279 MOVL (7*4)(SI), R11 // h = H7
248 MOVQ h+0(FP), SI // SI: &h
249 MOVQ message+24(FP), R8 // &message
250 MOVQ lenmessage+32(FP), R9 // length of message
251 CMPQ R9, $0
252 JEQ done_hash
253 ADDQ R8, R9
254 MOVQ R9, _inp_end+64(FP) // store end of message
255
256 // Register definition
257 // a --> eax
258 // b --> ebx
259 // c --> ecx
260 // d --> r8d
261 // e --> edx
262 // f --> r9d
263 // g --> r10d
264 // h --> r11d
265 //
266 // y0 --> r13d
267 // y1 --> r14d
268 // y2 --> r15d
269
270 MOVL (0*4)(SI), AX // a = H0
271 MOVL (1*4)(SI), BX // b = H1
272 MOVL (2*4)(SI), CX // c = H2
273 MOVL (3*4)(SI), R8 // d = H3
274 MOVL (4*4)(SI), DX // e = H4
275 MOVL (5*4)(SI), R9 // f = H5
276 MOVL (6*4)(SI), R10 // g = H6
277 MOVL (7*4)(SI), R11 // h = H7
280278
281279 MOVOU bflipMask<>(SB), X13
282 MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA
283 MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00
284
285 MOVQ message+24(FP), SI // SI: &message
280 MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA
281 MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00
282
283 MOVQ message+24(FP), SI // SI: &message
286284
287285 loop0:
288286 LEAQ constants<>(SB), BP
289287
290288 // byte swap first 16 dwords
291 MOVOU 0*16(SI), X4
292 LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13
293 MOVOU 1*16(SI), X5
294 LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13
295 MOVOU 2*16(SI), X6
296 LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13
297 MOVOU 3*16(SI), X7
298 LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13
299
300 MOVQ SI, _inp+72(FP)
301 MOVD $0x3, DI
302
303 // Align
304 // nop WORD PTR [rax+rax*1+0x0]
289 MOVOU 0*16(SI), X4
290 LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13
291 MOVOU 1*16(SI), X5
292 LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13
293 MOVOU 2*16(SI), X6
294 LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13
295 MOVOU 3*16(SI), X7
296 LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13
297
298 MOVQ SI, _inp+72(FP)
299 MOVD $0x3, DI
300
301 // Align
302 // nop WORD PTR [rax+rax*1+0x0]
305303
306304 // schedule 48 input dwords, by doing 3 rounds of 16 each
307305 loop1:
308 MOVOU X4, X9
309 LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */
310 MOVOU X9, _xfer+48(FP)
311 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
312
313 MOVOU X4, X9
314 LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */
315 MOVOU X9, _xfer+48(FP)
316 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
317
318 MOVOU X4, X9
319 LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */
320 MOVOU X9, _xfer+48(FP)
321 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
322
323 MOVOU X4, X9
324 LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */
325 MOVOU X9, _xfer+48(FP)
326 ADDQ $64, BP
327 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
328
329 SUBQ $1, DI
330 JNE loop1
331
332 MOVD $0x2, DI
306 MOVOU X4, X9
307 LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */
308 MOVOU X9, _xfer+48(FP)
309 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
310
311 MOVOU X4, X9
312 LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */
313 MOVOU X9, _xfer+48(FP)
314 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
315
316 MOVOU X4, X9
317 LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */
318 MOVOU X9, _xfer+48(FP)
319 FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
320
321 MOVOU X4, X9
322 LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */
323 MOVOU X9, _xfer+48(FP)
324 ADDQ $64, BP
325 FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
326
327 SUBQ $1, DI
328 JNE loop1
329
330 MOVD $0x2, DI
331
333332 loop2:
334 MOVOU X4, X9
335 LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */
336 MOVOU X9, _xfer+48(FP)
337 DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48)
338 DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52)
339 DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56)
340 DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60)
341
342 MOVOU X5, X9
343 LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */
344 MOVOU X9, _xfer+48(FP)
345 ADDQ $32, BP
346 DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48)
347 DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52)
348 DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56)
349 DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60)
350
351 MOVOU X6, X4
352 MOVOU X7, X5
353
354 SUBQ $1, DI
355 JNE loop2
356
357 MOVQ h+0(FP), SI // SI: &h
358 ADDL (0*4)(SI), AX // H0 = a + H0
359 MOVL AX, (0*4)(SI)
360 ADDL (1*4)(SI), BX // H1 = b + H1
361 MOVL BX, (1*4)(SI)
362 ADDL (2*4)(SI), CX // H2 = c + H2
363 MOVL CX, (2*4)(SI)
364 ADDL (3*4)(SI), R8 // H3 = d + H3
365 MOVL R8, (3*4)(SI)
366 ADDL (4*4)(SI), DX // H4 = e + H4
367 MOVL DX, (4*4)(SI)
368 ADDL (5*4)(SI), R9 // H5 = f + H5
369 MOVL R9, (5*4)(SI)
370 ADDL (6*4)(SI), R10 // H6 = g + H6
371 MOVL R10, (6*4)(SI)
372 ADDL (7*4)(SI), R11 // H7 = h + H7
373 MOVL R11, (7*4)(SI)
374
375 MOVQ _inp+72(FP), SI
333 MOVOU X4, X9
334 LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */
335 MOVOU X9, _xfer+48(FP)
336 DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48)
337 DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52)
338 DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56)
339 DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60)
340
341 MOVOU X5, X9
342 LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */
343 MOVOU X9, _xfer+48(FP)
344 ADDQ $32, BP
345 DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48)
346 DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52)
347 DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56)
348 DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60)
349
350 MOVOU X6, X4
351 MOVOU X7, X5
352
353 SUBQ $1, DI
354 JNE loop2
355
356 MOVQ h+0(FP), SI // SI: &h
357 ADDL (0*4)(SI), AX // H0 = a + H0
358 MOVL AX, (0*4)(SI)
359 ADDL (1*4)(SI), BX // H1 = b + H1
360 MOVL BX, (1*4)(SI)
361 ADDL (2*4)(SI), CX // H2 = c + H2
362 MOVL CX, (2*4)(SI)
363 ADDL (3*4)(SI), R8 // H3 = d + H3
364 MOVL R8, (3*4)(SI)
365 ADDL (4*4)(SI), DX // H4 = e + H4
366 MOVL DX, (4*4)(SI)
367 ADDL (5*4)(SI), R9 // H5 = f + H5
368 MOVL R9, (5*4)(SI)
369 ADDL (6*4)(SI), R10 // H6 = g + H6
370 MOVL R10, (6*4)(SI)
371 ADDL (7*4)(SI), R11 // H7 = h + H7
372 MOVL R11, (7*4)(SI)
373
374 MOVQ _inp+72(FP), SI
376375 ADDQ $64, SI
377376 CMPQ _inp_end+64(FP), SI
378 JNE loop0
377 JNE loop0
379378
380379 done_hash:
381 RET
380 RET
382381
383382 // Constants table
384383 DATA constants<>+0x0(SB)/8, $0x71374491428a2f98
2121 func blockAvx2Go(dig *digest, p []byte) {}
2222 func blockAvxGo(dig *digest, p []byte) {}
2323 func blockSsseGo(dig *digest, p []byte) {}
24 func blockShaGo(dig *digest, p []byte) {}
4545
4646 dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
4747 }
48
49 func blockShaGo(dig *digest, p []byte) {
50
51 blockSha(&dig.h, p)
52 }
2020 func blockAvx2Go(dig *digest, p []byte) {}
2121 func blockAvxGo(dig *digest, p []byte) {}
2222 func blockSsseGo(dig *digest, p []byte) {}
23 func blockShaGo(dig *digest, p []byte) {}
2324 func blockArmGo(dig *digest, p []byte) {}
2020 func blockAvx2Go(dig *digest, p []byte) {}
2121 func blockAvxGo(dig *digest, p []byte) {}
2222 func blockSsseGo(dig *digest, p []byte) {}
23 func blockShaGo(dig *digest, p []byte) {}
2324
2425 //go:noescape
2526 func blockArm(h []uint32, message []uint8)
153153 complete:
154154 RET
155155
156
157156 // Constants table
158157 DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
159158 DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
1919 func blockAvx2Go(dig *digest, p []byte) {}
2020 func blockAvxGo(dig *digest, p []byte) {}
2121 func blockSsseGo(dig *digest, p []byte) {}
22 func blockShaGo(dig *digest, p []byte) {}
2223 func blockArmGo(dig *digest, p []byte) {}