Support SHA* intrinsics on Intel CPU (#37)
* Support SHA* intrinsics on Intel CPU
- optimise: select block function at init
- added dedicated padding function, optimised endian conversion
- add assembly for Intel SHA extensions
- update benchmarks
- stream line checksum function
- cleanup of sha assembly code
* Cleanup code to be idiomatic Go
Harshavardhana authored 5 years ago
Frank Wessels committed 5 years ago
0 | *.test⏎ |
15 | 15 | package sha256 |
16 | 16 | |
17 | 17 | // True when SIMD instructions are available. |
18 | var avx512 = haveAVX512() | |
19 | var avx2 = haveAVX2() | |
20 | var avx = haveAVX() | |
21 | var ssse3 = haveSSSE3() | |
18 | var avx512 bool | |
19 | var avx2 bool | |
20 | var avx bool | |
21 | var sse bool | |
22 | var sse2 bool | |
23 | var sse3 bool | |
24 | var ssse3 bool | |
25 | var sse41 bool | |
26 | var sse42 bool | |
27 | var popcnt bool | |
28 | var sha bool | |
22 | 29 | var armSha = haveArmSha() |
23 | 30 | |
24 | // haveAVX returns true when there is AVX support | |
25 | func haveAVX() bool { | |
26 | _, _, c, _ := cpuid(1) | |
31 | func init() { | |
32 | var _xsave bool | |
33 | var _osxsave bool | |
34 | var _avx bool | |
35 | var _avx2 bool | |
36 | var _avx512f bool | |
37 | var _avx512dq bool | |
38 | // var _avx512pf bool | |
39 | // var _avx512er bool | |
40 | // var _avx512cd bool | |
41 | var _avx512bw bool | |
42 | var _avx512vl bool | |
43 | var _sseState bool | |
44 | var _avxState bool | |
45 | var _opmaskState bool | |
46 | var _zmmHI256State bool | |
47 | var _hi16ZmmState bool | |
27 | 48 | |
28 | // Check XGETBV, OXSAVE and AVX bits | |
29 | if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { | |
30 | // Check for OS support | |
31 | eax, _ := xgetbv(0) | |
32 | return (eax & 0x6) == 0x6 | |
33 | } | |
34 | return false | |
35 | } | |
36 | ||
37 | // haveAVX2 returns true when there is AVX2 support | |
38 | func haveAVX2() bool { | |
39 | 49 | mfi, _, _, _ := cpuid(0) |
40 | 50 | |
41 | // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. | |
42 | if mfi >= 7 && haveAVX() { | |
43 | _, ebx, _, _ := cpuidex(7, 0) | |
44 | return (ebx & 0x00000020) != 0 | |
51 | if mfi >= 1 { | |
52 | _, _, c, d := cpuid(1) | |
53 | ||
54 | sse = (d & (1 << 25)) != 0 | |
55 | sse2 = (d & (1 << 26)) != 0 | |
56 | sse3 = (c & (1 << 0)) != 0 | |
57 | ssse3 = (c & (1 << 9)) != 0 | |
58 | sse41 = (c & (1 << 19)) != 0 | |
59 | sse42 = (c & (1 << 20)) != 0 | |
60 | popcnt = (c & (1 << 23)) != 0 | |
61 | _xsave = (c & (1 << 26)) != 0 | |
62 | _osxsave = (c & (1 << 27)) != 0 | |
63 | _avx = (c & (1 << 28)) != 0 | |
45 | 64 | } |
46 | return false | |
65 | ||
66 | if mfi >= 7 { | |
67 | _, b, _, _ := cpuid(7) | |
68 | ||
69 | _avx2 = (b & (1 << 5)) != 0 | |
70 | _avx512f = (b & (1 << 16)) != 0 | |
71 | _avx512dq = (b & (1 << 17)) != 0 | |
72 | // _avx512pf = (b & (1 << 26)) != 0 | |
73 | // _avx512er = (b & (1 << 27)) != 0 | |
74 | // _avx512cd = (b & (1 << 28)) != 0 | |
75 | _avx512bw = (b & (1 << 30)) != 0 | |
76 | _avx512vl = (b & (1 << 31)) != 0 | |
77 | sha = (b & (1 << 29)) != 0 | |
78 | } | |
79 | ||
80 | // Stop here if XSAVE unsupported or not enabled | |
81 | if !_xsave || !_osxsave { | |
82 | return | |
83 | } | |
84 | ||
85 | if _xsave && _osxsave { | |
86 | a, _ := xgetbv(0) | |
87 | ||
88 | _sseState = (a & (1 << 1)) != 0 | |
89 | _avxState = (a & (1 << 2)) != 0 | |
90 | _opmaskState = (a & (1 << 5)) != 0 | |
91 | _zmmHI256State = (a & (1 << 6)) != 0 | |
92 | _hi16ZmmState = (a & (1 << 7)) != 0 | |
93 | } else { | |
94 | _sseState = true | |
95 | } | |
96 | ||
97 | // Very unlikely that OS would enable XSAVE and then disable SSE | |
98 | if !_sseState { | |
99 | sse = false | |
100 | sse2 = false | |
101 | sse3 = false | |
102 | ssse3 = false | |
103 | sse41 = false | |
104 | sse42 = false | |
105 | } | |
106 | ||
107 | if _avxState { | |
108 | avx = _avx | |
109 | avx2 = _avx2 | |
110 | } | |
111 | ||
112 | if _opmaskState && _zmmHI256State && _hi16ZmmState { | |
113 | avx512 = (_avx512f && | |
114 | _avx512dq && | |
115 | _avx512bw && | |
116 | _avx512vl) | |
117 | } | |
47 | 118 | } |
48 | ||
49 | // haveAVX512 returns true when there is AVX512 support | |
50 | func haveAVX512() bool { | |
51 | mfi, _, _, _ := cpuid(0) | |
52 | ||
53 | // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. | |
54 | if mfi >= 7 { | |
55 | _, _, c, _ := cpuid(1) | |
56 | ||
57 | // Only detect AVX-512 features if XGETBV is supported | |
58 | if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { | |
59 | // Check for OS support | |
60 | eax, _ := xgetbv(0) | |
61 | _, ebx, _, _ := cpuidex(7, 0) | |
62 | ||
63 | // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and | |
64 | // ZMM16-ZMM31 state are enabled by OS) | |
65 | /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). | |
66 | if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { | |
67 | if ebx&(1<<16) == 0 { | |
68 | return false // no AVX512F | |
69 | } | |
70 | if ebx&(1<<17) == 0 { | |
71 | return false // no AVX512DQ | |
72 | } | |
73 | if ebx&(1<<30) == 0 { | |
74 | return false // no AVX512BW | |
75 | } | |
76 | if ebx&(1<<31) == 0 { | |
77 | return false // no AVX512VL | |
78 | } | |
79 | return true | |
80 | } | |
81 | } | |
82 | } | |
83 | return false | |
84 | } | |
85 | ||
86 | // haveSSSE3 returns true when there is SSSE3 support | |
87 | func haveSSSE3() bool { | |
88 | ||
89 | _, _, c, _ := cpuid(1) | |
90 | ||
91 | return (c & 0x00000200) != 0 | |
92 | } |
23 | 23 | |
24 | 24 | // func cpuid(op uint32) (eax, ebx, ecx, edx uint32) |
25 | 25 | TEXT ·cpuid(SB), 7, $0 |
26 | XORL CX, CX | |
27 | MOVL op+0(FP), AX | |
28 | CPUID | |
29 | MOVL AX, eax+4(FP) | |
30 | MOVL BX, ebx+8(FP) | |
31 | MOVL CX, ecx+12(FP) | |
32 | MOVL DX, edx+16(FP) | |
33 | RET | |
26 | XORL CX, CX | |
27 | MOVL op+0(FP), AX | |
28 | CPUID | |
29 | MOVL AX, eax+4(FP) | |
30 | MOVL BX, ebx+8(FP) | |
31 | MOVL CX, ecx+12(FP) | |
32 | MOVL DX, edx+16(FP) | |
33 | RET | |
34 | 34 | |
35 | 35 | // func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) |
36 | 36 | TEXT ·cpuidex(SB), 7, $0 |
37 | MOVL op+0(FP), AX | |
38 | MOVL op2+4(FP), CX | |
39 | CPUID | |
40 | MOVL AX, eax+8(FP) | |
41 | MOVL BX, ebx+12(FP) | |
42 | MOVL CX, ecx+16(FP) | |
43 | MOVL DX, edx+20(FP) | |
44 | RET | |
37 | MOVL op+0(FP), AX | |
38 | MOVL op2+4(FP), CX | |
39 | CPUID | |
40 | MOVL AX, eax+8(FP) | |
41 | MOVL BX, ebx+12(FP) | |
42 | MOVL CX, ecx+16(FP) | |
43 | MOVL DX, edx+20(FP) | |
44 | RET | |
45 | 45 | |
46 | 46 | // func xgetbv(index uint32) (eax, edx uint32) |
47 | 47 | TEXT ·xgetbv(SB), 7, $0 |
48 | MOVL index+0(FP), CX | |
49 | BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV | |
50 | MOVL AX, eax+4(FP) | |
51 | MOVL DX, edx+8(FP) | |
52 | RET | |
48 | MOVL index+0(FP), CX | |
49 | BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV | |
50 | MOVL AX, eax+4(FP) | |
51 | MOVL DX, edx+8(FP) | |
52 | RET |
23 | 23 | |
24 | 24 | // func cpuid(op uint32) (eax, ebx, ecx, edx uint32) |
25 | 25 | TEXT ·cpuid(SB), 7, $0 |
26 | XORQ CX, CX | |
27 | MOVL op+0(FP), AX | |
28 | CPUID | |
29 | MOVL AX, eax+8(FP) | |
30 | MOVL BX, ebx+12(FP) | |
31 | MOVL CX, ecx+16(FP) | |
32 | MOVL DX, edx+20(FP) | |
33 | RET | |
34 | ||
26 | XORQ CX, CX | |
27 | MOVL op+0(FP), AX | |
28 | CPUID | |
29 | MOVL AX, eax+8(FP) | |
30 | MOVL BX, ebx+12(FP) | |
31 | MOVL CX, ecx+16(FP) | |
32 | MOVL DX, edx+20(FP) | |
33 | RET | |
35 | 34 | |
36 | 35 | // func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) |
37 | 36 | TEXT ·cpuidex(SB), 7, $0 |
38 | MOVL op+0(FP), AX | |
39 | MOVL op2+4(FP), CX | |
40 | CPUID | |
41 | MOVL AX, eax+8(FP) | |
42 | MOVL BX, ebx+12(FP) | |
43 | MOVL CX, ecx+16(FP) | |
44 | MOVL DX, edx+20(FP) | |
45 | RET | |
37 | MOVL op+0(FP), AX | |
38 | MOVL op2+4(FP), CX | |
39 | CPUID | |
40 | MOVL AX, eax+8(FP) | |
41 | MOVL BX, ebx+12(FP) | |
42 | MOVL CX, ecx+16(FP) | |
43 | MOVL DX, edx+20(FP) | |
44 | RET | |
46 | 45 | |
47 | 46 | // func xgetbv(index uint32) (eax, edx uint32) |
48 | 47 | TEXT ·xgetbv(SB), 7, $0 |
49 | MOVL index+0(FP), CX | |
50 | BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV | |
51 | MOVL AX, eax+8(FP) | |
52 | MOVL DX, edx+12(FP) | |
53 | RET | |
48 | MOVL index+0(FP), CX | |
49 | BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV | |
50 | MOVL AX, eax+8(FP) | |
51 | MOVL DX, edx+12(FP) | |
52 | RET |
17 | 17 | |
18 | 18 | import ( |
19 | 19 | "crypto/sha256" |
20 | "encoding/binary" | |
20 | 21 | "hash" |
21 | 22 | "runtime" |
22 | 23 | ) |
28 | 29 | const BlockSize = 64 |
29 | 30 | |
30 | 31 | const ( |
31 | chunk = 64 | |
32 | chunk = BlockSize | |
32 | 33 | init0 = 0x6A09E667 |
33 | 34 | init1 = 0xBB67AE85 |
34 | 35 | init2 = 0x3C6EF372 |
61 | 62 | d.len = 0 |
62 | 63 | } |
63 | 64 | |
65 | type blockfuncType int | |
66 | ||
67 | const ( | |
68 | blockfuncGeneric blockfuncType = iota | |
69 | blockfuncAvx512 blockfuncType = iota | |
70 | blockfuncAvx2 blockfuncType = iota | |
71 | blockfuncAvx blockfuncType = iota | |
72 | blockfuncSsse blockfuncType = iota | |
73 | blockfuncSha blockfuncType = iota | |
74 | blockfuncArm blockfuncType = iota | |
75 | ) | |
76 | ||
77 | var blockfunc blockfuncType | |
78 | ||
64 | 79 | func block(dig *digest, p []byte) { |
80 | if blockfunc == blockfuncSha { | |
81 | blockShaGo(dig, p) | |
82 | } else if blockfunc == blockfuncAvx2 { | |
83 | blockAvx2Go(dig, p) | |
84 | } else if blockfunc == blockfuncAvx { | |
85 | blockAvxGo(dig, p) | |
86 | } else if blockfunc == blockfuncSsse { | |
87 | blockSsseGo(dig, p) | |
88 | } else if blockfunc == blockfuncArm { | |
89 | blockArmGo(dig, p) | |
90 | } else if blockfunc == blockfuncGeneric { | |
91 | blockGeneric(dig, p) | |
92 | } | |
93 | } | |
94 | ||
95 | func init() { | |
65 | 96 | is386bit := runtime.GOARCH == "386" |
66 | 97 | isARM := runtime.GOARCH == "arm" |
67 | if is386bit || isARM { | |
68 | blockGeneric(dig, p) | |
69 | } | |
70 | switch !is386bit && !isARM { | |
98 | switch { | |
99 | case is386bit || isARM: | |
100 | blockfunc = blockfuncGeneric | |
101 | case sha && ssse3 && sse41: | |
102 | blockfunc = blockfuncSha | |
71 | 103 | case avx2: |
72 | blockAvx2Go(dig, p) | |
104 | blockfunc = blockfuncAvx2 | |
73 | 105 | case avx: |
74 | blockAvxGo(dig, p) | |
106 | blockfunc = blockfuncAvx | |
75 | 107 | case ssse3: |
76 | blockSsseGo(dig, p) | |
108 | blockfunc = blockfuncSsse | |
77 | 109 | case armSha: |
78 | blockArmGo(dig, p) | |
110 | blockfunc = blockfuncArm | |
79 | 111 | default: |
80 | blockGeneric(dig, p) | |
112 | blockfunc = blockfuncGeneric | |
81 | 113 | } |
82 | 114 | } |
83 | 115 | |
84 | 116 | // New returns a new hash.Hash computing the SHA256 checksum. |
85 | 117 | func New() hash.Hash { |
86 | if avx2 || avx || ssse3 || armSha { | |
118 | if blockfunc != blockfuncGeneric { | |
87 | 119 | d := new(digest) |
88 | 120 | d.Reset() |
89 | 121 | return d |
94 | 126 | } |
95 | 127 | |
96 | 128 | // Sum256 - single caller sha256 helper |
97 | func Sum256(data []byte) [Size]byte { | |
129 | func Sum256(data []byte) (result [Size]byte) { | |
98 | 130 | var d digest |
99 | 131 | d.Reset() |
100 | 132 | d.Write(data) |
101 | return d.checkSum() | |
133 | result = d.checkSum() | |
134 | return | |
102 | 135 | } |
103 | 136 | |
104 | 137 | // Return size of checksum |
140 | 173 | } |
141 | 174 | |
142 | 175 | // Intermediate checksum function |
143 | func (d *digest) checkSum() [Size]byte { | |
144 | len := d.len | |
145 | // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. | |
146 | var tmp [64]byte | |
147 | tmp[0] = 0x80 | |
148 | if len%64 < 56 { | |
149 | d.Write(tmp[0 : 56-len%64]) | |
150 | } else { | |
151 | d.Write(tmp[0 : 64+56-len%64]) | |
152 | } | |
153 | ||
154 | // Length in bits. | |
155 | len <<= 3 | |
156 | for i := uint(0); i < 8; i++ { | |
157 | tmp[i] = byte(len >> (56 - 8*i)) | |
158 | } | |
159 | d.Write(tmp[0:8]) | |
160 | ||
161 | if d.nx != 0 { | |
162 | panic("d.nx != 0") | |
163 | } | |
164 | ||
165 | h := d.h[:] | |
166 | ||
167 | var digest [Size]byte | |
168 | for i, s := range h { | |
169 | digest[i*4] = byte(s >> 24) | |
170 | digest[i*4+1] = byte(s >> 16) | |
171 | digest[i*4+2] = byte(s >> 8) | |
172 | digest[i*4+3] = byte(s) | |
173 | } | |
174 | ||
175 | return digest | |
176 | } | |
176 | func (d *digest) checkSum() (digest [Size]byte) { | |
177 | n := d.nx | |
178 | ||
179 | var k [64]byte | |
180 | copy(k[:], d.x[:n]) | |
181 | ||
182 | k[n] = 0x80 | |
183 | ||
184 | if n >= 56 { | |
185 | block(d, k[:]) | |
186 | ||
187 | // clear block buffer - go compiles this to optimal 1x xorps + 4x movups | |
188 | // unfortunately expressing this more succinctly results in much worse code | |
189 | k[0] = 0 | |
190 | k[1] = 0 | |
191 | k[2] = 0 | |
192 | k[3] = 0 | |
193 | k[4] = 0 | |
194 | k[5] = 0 | |
195 | k[6] = 0 | |
196 | k[7] = 0 | |
197 | k[8] = 0 | |
198 | k[9] = 0 | |
199 | k[10] = 0 | |
200 | k[11] = 0 | |
201 | k[12] = 0 | |
202 | k[13] = 0 | |
203 | k[14] = 0 | |
204 | k[15] = 0 | |
205 | k[16] = 0 | |
206 | k[17] = 0 | |
207 | k[18] = 0 | |
208 | k[19] = 0 | |
209 | k[20] = 0 | |
210 | k[21] = 0 | |
211 | k[22] = 0 | |
212 | k[23] = 0 | |
213 | k[24] = 0 | |
214 | k[25] = 0 | |
215 | k[26] = 0 | |
216 | k[27] = 0 | |
217 | k[28] = 0 | |
218 | k[29] = 0 | |
219 | k[30] = 0 | |
220 | k[31] = 0 | |
221 | k[32] = 0 | |
222 | k[33] = 0 | |
223 | k[34] = 0 | |
224 | k[35] = 0 | |
225 | k[36] = 0 | |
226 | k[37] = 0 | |
227 | k[38] = 0 | |
228 | k[39] = 0 | |
229 | k[40] = 0 | |
230 | k[41] = 0 | |
231 | k[42] = 0 | |
232 | k[43] = 0 | |
233 | k[44] = 0 | |
234 | k[45] = 0 | |
235 | k[46] = 0 | |
236 | k[47] = 0 | |
237 | k[48] = 0 | |
238 | k[49] = 0 | |
239 | k[50] = 0 | |
240 | k[51] = 0 | |
241 | k[52] = 0 | |
242 | k[53] = 0 | |
243 | k[54] = 0 | |
244 | k[55] = 0 | |
245 | k[56] = 0 | |
246 | k[57] = 0 | |
247 | k[58] = 0 | |
248 | k[59] = 0 | |
249 | k[60] = 0 | |
250 | k[61] = 0 | |
251 | k[62] = 0 | |
252 | k[63] = 0 | |
253 | } | |
254 | binary.BigEndian.PutUint64(k[56:64], uint64(d.len)<<3) | |
255 | block(d, k[:]) | |
256 | ||
257 | { | |
258 | const i = 0 | |
259 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
260 | } | |
261 | { | |
262 | const i = 1 | |
263 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
264 | } | |
265 | { | |
266 | const i = 2 | |
267 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
268 | } | |
269 | { | |
270 | const i = 3 | |
271 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
272 | } | |
273 | { | |
274 | const i = 4 | |
275 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
276 | } | |
277 | { | |
278 | const i = 5 | |
279 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
280 | } | |
281 | { | |
282 | const i = 6 | |
283 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
284 | } | |
285 | { | |
286 | const i = 7 | |
287 | binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) | |
288 | } | |
289 | ||
290 | return | |
291 | } |
2207 | 2207 | } |
2208 | 2208 | |
2209 | 2209 | func TestGolden(t *testing.T) { |
2210 | blockfuncSaved := blockfunc | |
2211 | ||
2212 | if sha && ssse3 && sse41 { | |
2213 | blockfunc = blockfuncSha | |
2214 | for _, g := range golden { | |
2215 | s := fmt.Sprintf("%x", Sum256([]byte(g.in))) | |
2216 | if Sum256([]byte(g.in)) != g.out { | |
2217 | t.Fatalf("SHA: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) | |
2218 | } | |
2219 | } | |
2220 | } | |
2210 | 2221 | if avx2 { |
2222 | blockfunc = blockfuncAvx2 | |
2211 | 2223 | for _, g := range golden { |
2212 | 2224 | s := fmt.Sprintf("%x", Sum256([]byte(g.in))) |
2213 | 2225 | if Sum256([]byte(g.in)) != g.out { |
2214 | 2226 | t.Fatalf("AVX2: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) |
2215 | 2227 | } |
2216 | 2228 | } |
2217 | avx2 = false | |
2218 | 2229 | } |
2219 | 2230 | if avx { |
2231 | blockfunc = blockfuncAvx | |
2220 | 2232 | for _, g := range golden { |
2221 | 2233 | s := fmt.Sprintf("%x", Sum256([]byte(g.in))) |
2222 | 2234 | if Sum256([]byte(g.in)) != g.out { |
2223 | 2235 | t.Fatalf("AVX: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) |
2224 | 2236 | } |
2225 | 2237 | } |
2226 | avx = false | |
2227 | 2238 | } |
2228 | 2239 | if ssse3 { |
2240 | blockfunc = blockfuncSsse | |
2229 | 2241 | for _, g := range golden { |
2230 | 2242 | s := fmt.Sprintf("%x", Sum256([]byte(g.in))) |
2231 | 2243 | if Sum256([]byte(g.in)) != g.out { |
2233 | 2245 | } |
2234 | 2246 | } |
2235 | 2247 | } |
2248 | if true { | |
2249 | blockfunc = blockfuncGeneric | |
2250 | for _, g := range golden { | |
2251 | s := fmt.Sprintf("%x", Sum256([]byte(g.in))) | |
2252 | if Sum256([]byte(g.in)) != g.out { | |
2253 | t.Fatalf("Generic: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) | |
2254 | } | |
2255 | } | |
2256 | } | |
2257 | ||
2258 | blockfunc = blockfuncSaved | |
2236 | 2259 | } |
2237 | 2260 | |
2238 | 2261 | func TestSize(t *testing.T) { |
2254 | 2277 | var buf = make([]byte, size) |
2255 | 2278 | b.SetBytes(int64(size)) |
2256 | 2279 | sum := make([]byte, bench.Size()) |
2280 | b.ResetTimer() | |
2257 | 2281 | for i := 0; i < b.N; i++ { |
2258 | 2282 | bench.Reset() |
2259 | 2283 | bench.Write(buf[:size]) |
2261 | 2285 | } |
2262 | 2286 | } |
2263 | 2287 | |
2264 | func BenchmarkHash8Bytes(b *testing.B) { benchmarkSize(b, 8) } | |
2265 | func BenchmarkHash1K(b *testing.B) { benchmarkSize(b, 1024) } | |
2266 | func BenchmarkHash8K(b *testing.B) { benchmarkSize(b, 8192) } | |
2267 | func BenchmarkHash1MAvx2(b *testing.B) { benchmarkSize(b, 1024*1024) } | |
2268 | func BenchmarkHash5MAvx2(b *testing.B) { benchmarkSize(b, 5*1024*1024) } | |
2269 | func BenchmarkHash10MAvx2(b *testing.B) { benchmarkSize(b, 10*1024*1024) } | |
2288 | func BenchmarkHash(b *testing.B) { | |
2289 | algos := []struct { | |
2290 | n string | |
2291 | t blockfuncType | |
2292 | f bool | |
2293 | }{ | |
2294 | {"SHA_", blockfuncSha, sha && sse41 && ssse3}, | |
2295 | {"AVX2", blockfuncAvx2, avx2}, | |
2296 | {"AVX_", blockfuncAvx, avx}, | |
2297 | {"SSSE", blockfuncSsse, ssse3}, | |
2298 | {"GEN_", blockfuncGeneric, true}, | |
2299 | } | |
2300 | ||
2301 | sizes := []struct { | |
2302 | n string | |
2303 | f func(*testing.B, int) | |
2304 | s int | |
2305 | }{ | |
2306 | {"8Bytes", benchmarkSize, 1 << 3}, | |
2307 | {"1K", benchmarkSize, 1 << 10}, | |
2308 | {"8K", benchmarkSize, 1 << 13}, | |
2309 | {"1M", benchmarkSize, 1 << 20}, | |
2310 | {"5M", benchmarkSize, 5 << 20}, | |
2311 | {"10M", benchmarkSize, 5 << 21}, | |
2312 | } | |
2313 | ||
2314 | for _, a := range algos { | |
2315 | if a.f { | |
2316 | blockfuncSaved := blockfunc | |
2317 | blockfunc = a.t | |
2318 | for _, y := range sizes { | |
2319 | s := a.n + "/" + y.n | |
2320 | b.Run(s, func(b *testing.B) { y.f(b, y.s) }) | |
2321 | } | |
2322 | blockfunc = blockfuncSaved | |
2323 | } | |
2324 | } | |
2325 | } |
116 | 116 | // func blockAvx2(h []uint32, message []uint8) |
117 | 117 | TEXT ·blockAvx2(SB), 7, $0 |
118 | 118 | |
119 | MOVQ ctx+0(FP), DI // DI: &h | |
120 | MOVQ inp+24(FP), SI // SI: &message | |
121 | MOVQ inplength+32(FP), DX // len(message) | |
122 | ADDQ SI, DX // end pointer of input | |
123 | MOVQ SP, R11 // copy stack pointer | |
124 | SUBQ $0x220, SP // sp -= 0x220 | |
125 | ANDQ $0xfffffffffffffc00, SP // align stack frame | |
126 | ADDQ $0x1c0, SP | |
127 | MOVQ DI, 0x40(SP) // save ctx | |
128 | MOVQ SI, 0x48(SP) // save input | |
129 | MOVQ DX, 0x50(SP) // save end pointer | |
130 | MOVQ R11, 0x58(SP) // save copy of stack pointer | |
131 | ||
132 | WORD $0xf8c5; BYTE $0x77 // vzeroupper | |
133 | ADDQ $0x40, SI // input++ | |
134 | MOVL (DI), AX | |
135 | MOVQ SI, R12 // borrow $T1 | |
136 | MOVL 4(DI), BX | |
137 | CMPQ SI, DX // $_end | |
138 | MOVL 8(DI), CX | |
139 | LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */ | |
140 | MOVL 12(DI), DX | |
141 | MOVL 16(DI), R8 | |
142 | MOVL 20(DI), R9 | |
143 | MOVL 24(DI), R10 | |
144 | MOVL 28(DI), R11 | |
145 | ||
146 | LEAQ K256<>(SB), BP | |
147 | LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */ | |
148 | LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */ | |
149 | LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */ | |
119 | MOVQ ctx+0(FP), DI // DI: &h | |
120 | MOVQ inp+24(FP), SI // SI: &message | |
121 | MOVQ inplength+32(FP), DX // len(message) | |
122 | ADDQ SI, DX // end pointer of input | |
123 | MOVQ SP, R11 // copy stack pointer | |
124 | SUBQ $0x220, SP // sp -= 0x220 | |
125 | ANDQ $0xfffffffffffffc00, SP // align stack frame | |
126 | ADDQ $0x1c0, SP | |
127 | MOVQ DI, 0x40(SP) // save ctx | |
128 | MOVQ SI, 0x48(SP) // save input | |
129 | MOVQ DX, 0x50(SP) // save end pointer | |
130 | MOVQ R11, 0x58(SP) // save copy of stack pointer | |
131 | ||
132 | WORD $0xf8c5; BYTE $0x77 // vzeroupper | |
133 | ADDQ $0x40, SI // input++ | |
134 | MOVL (DI), AX | |
135 | MOVQ SI, R12 // borrow $T1 | |
136 | MOVL 4(DI), BX | |
137 | CMPQ SI, DX // $_end | |
138 | MOVL 8(DI), CX | |
139 | LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */ | |
140 | MOVL 12(DI), DX | |
141 | MOVL 16(DI), R8 | |
142 | MOVL 20(DI), R9 | |
143 | MOVL 24(DI), R10 | |
144 | MOVL 28(DI), R11 | |
145 | ||
146 | LEAQ K256<>(SB), BP | |
147 | LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */ | |
148 | LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */ | |
149 | LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */ | |
150 | 150 | |
151 | 151 | loop0: |
152 | LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10 | |
153 | ||
154 | // Load first 16 dwords from two blocks | |
155 | MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40] | |
156 | MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30] | |
157 | MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20] | |
158 | MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10] | |
159 | ||
160 | // Byte swap data and transpose data into high/low | |
161 | LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1 | |
162 | LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1 | |
163 | LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7 | |
164 | LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1 | |
165 | LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7 | |
166 | LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1 | |
167 | ||
168 | LEAQ K256<>(SB), BP | |
169 | LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7 | |
170 | LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp] | |
171 | LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7 | |
172 | LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp] | |
173 | LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp] | |
174 | LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp] | |
175 | ||
176 | LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4 | |
177 | XORQ R14, R14 | |
178 | LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5 | |
179 | ||
180 | ADDQ $-0x40, SP | |
181 | MOVQ BX, DI | |
182 | LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 | |
183 | XORQ CX, DI // magic | |
184 | LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7 | |
185 | MOVQ R9, R12 | |
186 | ADDQ $0x80,BP | |
152 | LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10 | |
153 | ||
154 | // Load first 16 dwords from two blocks | |
155 | MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40] | |
156 | MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30] | |
157 | MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20] | |
158 | MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10] | |
159 | ||
160 | // Byte swap data and transpose data into high/low | |
161 | LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1 | |
162 | LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1 | |
163 | LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7 | |
164 | LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1 | |
165 | LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7 | |
166 | LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1 | |
167 | ||
168 | LEAQ K256<>(SB), BP | |
169 | LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7 | |
170 | LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp] | |
171 | LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7 | |
172 | LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp] | |
173 | LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp] | |
174 | LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp] | |
175 | ||
176 | LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4 | |
177 | XORQ R14, R14 | |
178 | LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5 | |
179 | ||
180 | ADDQ $-0x40, SP | |
181 | MOVQ BX, DI | |
182 | LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 | |
183 | XORQ CX, DI // magic | |
184 | LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7 | |
185 | MOVQ R9, R12 | |
186 | ADDQ $0x80, BP | |
187 | 187 | |
188 | 188 | loop1: |
189 | // Schedule 48 input dwords, by doing 3 rounds of 12 each | |
190 | // Note: SIMD instructions are interleaved with the SHA calculations | |
191 | ADDQ $-0x40, SP | |
192 | LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4 | |
193 | ||
194 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) | |
195 | LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] | |
196 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
197 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
198 | LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4 | |
199 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
200 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
201 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
202 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
203 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
204 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
205 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
206 | LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7 | |
207 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
208 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
209 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
210 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
211 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
212 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
213 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
214 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
215 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
216 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
217 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
218 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
219 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
220 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
221 | WORD $0xdf31 // xor edi,ebx | |
222 | LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa | |
223 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
224 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
225 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
226 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
227 | ||
228 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) | |
229 | LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] | |
230 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
231 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
232 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
233 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
234 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
235 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
236 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
237 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
238 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
239 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
240 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
241 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
242 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
243 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
244 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
245 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
246 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
247 | WORD $0xc731 // xor edi,eax | |
248 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
249 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
250 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
251 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
252 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
253 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
254 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
255 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
256 | LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4 | |
257 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
258 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
259 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
260 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
261 | ||
262 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) | |
263 | LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] | |
264 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
265 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
266 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
267 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
268 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
269 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
270 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
271 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
272 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
273 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
274 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
275 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
276 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
277 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
278 | LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 | |
279 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
280 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
281 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
282 | LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50 | |
283 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
284 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
285 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
286 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
287 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
288 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
289 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
290 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
291 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
292 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
293 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
294 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
295 | ||
296 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) | |
297 | LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] | |
298 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
299 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
300 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
301 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
302 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
303 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
304 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
305 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
306 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
307 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
308 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
309 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
310 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
311 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
312 | LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 | |
313 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
314 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
315 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
316 | LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0] | |
317 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
318 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
319 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
320 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
321 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
322 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
323 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
324 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
325 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
326 | ||
327 | LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 | |
328 | LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4 | |
329 | ||
330 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) | |
331 | LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] | |
332 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
333 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
334 | LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4 | |
335 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
336 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
337 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
338 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
339 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
340 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
341 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
342 | LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7 | |
343 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
344 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
345 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
346 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
347 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
348 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
349 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
350 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
351 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
352 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
353 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
354 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
355 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
356 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
357 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
358 | LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa | |
359 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
360 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
361 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
362 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
363 | ||
364 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) | |
365 | LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] | |
366 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
367 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
368 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
369 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
370 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
371 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
372 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
373 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
374 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
375 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
376 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
377 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
378 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
379 | WORD $0xd789 // mov edi,edx | |
380 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
381 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
382 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
383 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
384 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
385 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
386 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
387 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
388 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
389 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
390 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
391 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
392 | LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4 | |
393 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
394 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
395 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
396 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
397 | ||
398 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) | |
399 | LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] | |
400 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
401 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
402 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
403 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
404 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
405 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
406 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
407 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
408 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
409 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
410 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
411 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
412 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
413 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
414 | LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 | |
415 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
416 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
417 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
418 | LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50 | |
419 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
420 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
421 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
422 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
423 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
424 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
425 | WORD $0xd731 // xor edi,edx | |
426 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
427 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
428 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
429 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
430 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
431 | ||
432 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) | |
433 | LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] | |
434 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
435 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
436 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
437 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
438 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
439 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
440 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
441 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
442 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
443 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
444 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
445 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
446 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
447 | WORD $0xdf89 // mov edi,ebx | |
448 | LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 | |
449 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
450 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
451 | WORD $0xcf31 // xor edi,ecx | |
452 | LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20] | |
453 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
454 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
455 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
456 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
457 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
458 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
459 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
460 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
461 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
462 | ||
463 | LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 | |
464 | ||
465 | LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40] | |
466 | LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4 | |
467 | ||
468 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) | |
469 | LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] | |
470 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
471 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
472 | LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4 | |
473 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
474 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
475 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
476 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
477 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
478 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
479 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
480 | LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7 | |
481 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
482 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
483 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
484 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
485 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
486 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
487 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
488 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
489 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
490 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
491 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
492 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
493 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
494 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
495 | WORD $0xdf31 // xor edi,ebx | |
496 | LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa | |
497 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
498 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
499 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
500 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
501 | ||
502 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) | |
503 | LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] | |
504 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
505 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
506 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
507 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
508 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
509 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
510 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
511 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
512 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
513 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
514 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
515 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
516 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
517 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
518 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
519 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
520 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
521 | WORD $0xc731 // xor edi,eax | |
522 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
523 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
524 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
525 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
526 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
527 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
528 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
529 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
530 | LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4 | |
531 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
532 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
533 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
534 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
535 | ||
536 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) | |
537 | LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] | |
538 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
539 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
540 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
541 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
542 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
543 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
544 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
545 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
546 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
547 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
548 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
549 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
550 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
551 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
552 | LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 | |
553 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
554 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
555 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
556 | LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50 | |
557 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
558 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
559 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
560 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
561 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
562 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
563 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
564 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
565 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
566 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
567 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
568 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
569 | ||
570 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) | |
571 | LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] | |
572 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
573 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
574 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
575 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
576 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
577 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
578 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
579 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
580 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
581 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
582 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
583 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
584 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
585 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
586 | LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 | |
587 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
588 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
589 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
590 | LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40] | |
591 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
592 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
593 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
594 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
595 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
596 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
597 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
598 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
599 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
600 | ||
601 | LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 | |
602 | LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4 | |
603 | ||
604 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) | |
605 | LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] | |
606 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
607 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
608 | LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4 | |
609 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
610 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
611 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
612 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
613 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
614 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
615 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
616 | LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7 | |
617 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
618 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
619 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
620 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
621 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
622 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
623 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
624 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
625 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
626 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
627 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
628 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
629 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
630 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
631 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
632 | LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa | |
633 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
634 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
635 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
636 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
637 | ||
638 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) | |
639 | LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] | |
640 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
641 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
642 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
643 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
644 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
645 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
646 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
647 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
648 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
649 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
650 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
651 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
652 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
653 | WORD $0xd789 // mov edi,edx | |
654 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
655 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
656 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
657 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
658 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
659 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
660 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
661 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
662 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
663 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
664 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
665 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
666 | LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4 | |
667 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
668 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
669 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
670 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
671 | ||
672 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) | |
673 | LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] | |
674 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
675 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
676 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
677 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
678 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
679 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
680 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
681 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
682 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
683 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
684 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
685 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
686 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
687 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
688 | LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 | |
689 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
690 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
691 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
692 | LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50 | |
693 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
694 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
695 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
696 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
697 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
698 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
699 | WORD $0xd731 // xor edi,edx | |
700 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
701 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
702 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
703 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
704 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
705 | ||
706 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) | |
707 | LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] | |
708 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
709 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
710 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
711 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
712 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
713 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
714 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
715 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
716 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
717 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
718 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
719 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
720 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
721 | WORD $0xdf89 // mov edi,ebx | |
722 | LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 | |
723 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
724 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
725 | WORD $0xcf31 // xor edi,ecx | |
726 | LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60] | |
727 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
728 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
729 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
730 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
731 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
732 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
733 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
734 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
735 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
736 | ||
737 | LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 | |
738 | ADDQ $0x80, BP | |
739 | ||
740 | CMPB 0x3(BP),$0x0 | |
741 | JNE loop1 | |
742 | ||
743 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40) | |
744 | LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40] | |
745 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
746 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
747 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
748 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
749 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
750 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
751 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
752 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
753 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
754 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
755 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
756 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
757 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
758 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
759 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
760 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
761 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
762 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
763 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
764 | WORD $0xdf31 // xor edi,ebx | |
765 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
766 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
767 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
768 | ||
769 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44) | |
770 | LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44] | |
771 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
772 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
773 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
774 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
775 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
776 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
777 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
778 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
779 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
780 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
781 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
782 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
783 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
784 | WORD $0xc731 // xor edi,eax | |
785 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
786 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
787 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
788 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
789 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
790 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
791 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
792 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
793 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
794 | ||
795 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48) | |
796 | LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48] | |
797 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
798 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
799 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
800 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
801 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
802 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
803 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
804 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
805 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
806 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
807 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
808 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
809 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
810 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
811 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
812 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
813 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
814 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
815 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
816 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
817 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
818 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
819 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
820 | ||
821 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c) | |
822 | LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c] | |
823 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
824 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
825 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
826 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
827 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
828 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
829 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
830 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
831 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
832 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
833 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
834 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
835 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
836 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
837 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
838 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
839 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
840 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
841 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
842 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
843 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
844 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
845 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
846 | ||
847 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60) | |
848 | LONG $0x60245403 // add edx,[rsp+0x60] | |
849 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
850 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
851 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
852 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
853 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
854 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
855 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
856 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
857 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
858 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
859 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
860 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
861 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
862 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
863 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
864 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
865 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
866 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
867 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
868 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
869 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
870 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
871 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
872 | ||
873 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64) | |
874 | LONG $0x64244c03 // add ecx,[rsp+0x64] | |
875 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
876 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
877 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
878 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
879 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
880 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
881 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
882 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
883 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
884 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
885 | WORD $0xd789 // mov edi,edx | |
886 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
887 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
888 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
889 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
890 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
891 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
892 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
893 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
894 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
895 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
896 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
897 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
898 | ||
899 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68) | |
900 | LONG $0x68245c03 // add ebx,[rsp+0x68] | |
901 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
902 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
903 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
904 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
905 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
906 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
907 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
908 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
909 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
910 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
911 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
912 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
913 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
914 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
915 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
916 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
917 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
918 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
919 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
920 | WORD $0xd731 // xor edi,edx | |
921 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
922 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
923 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
924 | ||
925 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c) | |
926 | LONG $0x6c244403 // add eax,[rsp+0x6c] | |
927 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
928 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
929 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
930 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
931 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
932 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
933 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
934 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
935 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
936 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
937 | WORD $0xdf89 // mov edi,ebx | |
938 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
939 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
940 | WORD $0xcf31 // xor edi,ecx | |
941 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
942 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
943 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
944 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
945 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
946 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
947 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
948 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
949 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
950 | ||
951 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00) | |
952 | LONG $0x241c0344 // add r11d,[rsp] | |
953 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
954 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
955 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
956 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
957 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
958 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
959 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
960 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
961 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
962 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
963 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
964 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
965 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
966 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
967 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
968 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
969 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
970 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
971 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
972 | WORD $0xdf31 // xor edi,ebx | |
973 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
974 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
975 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
976 | ||
977 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04) | |
978 | LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4] | |
979 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
980 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
981 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
982 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
983 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
984 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
985 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
986 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
987 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
988 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
989 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
990 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
991 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
992 | WORD $0xc731 // xor edi,eax | |
993 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
994 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
995 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
996 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
997 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
998 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
999 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1000 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
1001 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
1002 | ||
1003 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08) | |
1004 | LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8] | |
1005 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
1006 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
1007 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
1008 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
1009 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1010 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
1011 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1012 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
1013 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1014 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1015 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
1016 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
1017 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
1018 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
1019 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
1020 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
1021 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
1022 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1023 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1024 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
1025 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1026 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
1027 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
1028 | ||
1029 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c) | |
1030 | LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc] | |
1031 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
1032 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
1033 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
1034 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
1035 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1036 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
1037 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1038 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
1039 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1040 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1041 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
1042 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
1043 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
1044 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
1045 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
1046 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
1047 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
1048 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1049 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1050 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
1051 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1052 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
1053 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
1054 | ||
1055 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20) | |
1056 | LONG $0x20245403 // add edx,[rsp+0x20] | |
1057 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
1058 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
1059 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
1060 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
1061 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1062 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
1063 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1064 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
1065 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1066 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1067 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
1068 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
1069 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
1070 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
1071 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
1072 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
1073 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
1074 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1075 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1076 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
1077 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1078 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
1079 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
1080 | ||
1081 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24) | |
1082 | LONG $0x24244c03 // add ecx,[rsp+0x24] | |
1083 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
1084 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
1085 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
1086 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
1087 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1088 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
1089 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1090 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
1091 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1092 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1093 | WORD $0xd789 // mov edi,edx | |
1094 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
1095 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
1096 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
1097 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
1098 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
1099 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
1100 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1101 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1102 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
1103 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1104 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
1105 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
1106 | ||
1107 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28) | |
1108 | LONG $0x28245c03 // add ebx,[rsp+0x28] | |
1109 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
1110 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
1111 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
1112 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
1113 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1114 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
1115 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1116 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
1117 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1118 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1119 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
1120 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
1121 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
1122 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
1123 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
1124 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
1125 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
1126 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1127 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1128 | WORD $0xd731 // xor edi,edx | |
1129 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1130 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
1131 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
1132 | ||
1133 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c) | |
1134 | LONG $0x2c244403 // add eax,[rsp+0x2c] | |
1135 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
1136 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
1137 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
1138 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
1139 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1140 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
1141 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1142 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
1143 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1144 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1145 | WORD $0xdf89 // mov edi,ebx | |
1146 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
1147 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
1148 | WORD $0xcf31 // xor edi,ecx | |
1149 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
1150 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
1151 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
1152 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1153 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1154 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
1155 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1156 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
1157 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
1158 | ||
1159 | MOVQ 0x200(SP), DI // $_ctx | |
1160 | ADDQ R14, AX | |
1161 | ||
1162 | LEAQ 0x1c0(SP), BP | |
1163 | ||
1164 | ADDL (DI), AX | |
1165 | ADDL 4(DI), BX | |
1166 | ADDL 8(DI), CX | |
1167 | ADDL 12(DI), DX | |
1168 | ADDL 16(DI), R8 | |
1169 | ADDL 20(DI), R9 | |
1170 | ADDL 24(DI), R10 | |
1171 | ADDL 28(DI), R11 | |
1172 | ||
1173 | MOVL AX, (DI) | |
1174 | MOVL BX, 4(DI) | |
1175 | MOVL CX, 8(DI) | |
1176 | MOVL DX, 12(DI) | |
1177 | MOVL R8, 16(DI) | |
1178 | MOVL R9, 20(DI) | |
1179 | MOVL R10, 24(DI) | |
1180 | MOVL R11, 28(DI) | |
1181 | ||
1182 | CMPQ SI, 0x50(BP) // $_end | |
1183 | JE done | |
1184 | ||
1185 | XORQ R14, R14 | |
1186 | MOVQ BX, DI | |
1187 | XORQ CX, DI // magic | |
1188 | MOVQ R9, R12 | |
189 | // Schedule 48 input dwords, by doing 3 rounds of 12 each | |
190 | // Note: SIMD instructions are interleaved with the SHA calculations | |
191 | ADDQ $-0x40, SP | |
192 | LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4 | |
193 | ||
194 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) | |
195 | LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] | |
196 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
197 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
198 | LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4 | |
199 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
200 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
201 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
202 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
203 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
204 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
205 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
206 | LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7 | |
207 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
208 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
209 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
210 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
211 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
212 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
213 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
214 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
215 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
216 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
217 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
218 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
219 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
220 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
221 | WORD $0xdf31 // xor edi,ebx | |
222 | LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa | |
223 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
224 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
225 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
226 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
227 | ||
228 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) | |
229 | LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] | |
230 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
231 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
232 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
233 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
234 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
235 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
236 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
237 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
238 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
239 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
240 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
241 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
242 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
243 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
244 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
245 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
246 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
247 | WORD $0xc731 // xor edi,eax | |
248 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
249 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
250 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
251 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
252 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
253 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
254 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
255 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
256 | LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4 | |
257 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
258 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
259 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
260 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
261 | ||
262 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) | |
263 | LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] | |
264 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
265 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
266 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
267 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
268 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
269 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
270 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
271 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
272 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
273 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
274 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
275 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
276 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
277 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
278 | LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 | |
279 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
280 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
281 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
282 | LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50 | |
283 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
284 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
285 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
286 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
287 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
288 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
289 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
290 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
291 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
292 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
293 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
294 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
295 | ||
296 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) | |
297 | LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] | |
298 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
299 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
300 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
301 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
302 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
303 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
304 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
305 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
306 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
307 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
308 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
309 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
310 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
311 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
312 | LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 | |
313 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
314 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
315 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
316 | LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0] | |
317 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
318 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
319 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
320 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
321 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
322 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
323 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
324 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
325 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
326 | ||
327 | LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 | |
328 | LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4 | |
329 | ||
330 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) | |
331 | LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] | |
332 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
333 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
334 | LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4 | |
335 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
336 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
337 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
338 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
339 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
340 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
341 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
342 | LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7 | |
343 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
344 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
345 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
346 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
347 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
348 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
349 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
350 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
351 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
352 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
353 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
354 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
355 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
356 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
357 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
358 | LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa | |
359 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
360 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
361 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
362 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
363 | ||
364 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) | |
365 | LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] | |
366 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
367 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
368 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
369 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
370 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
371 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
372 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
373 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
374 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
375 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
376 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
377 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
378 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
379 | WORD $0xd789 // mov edi,edx | |
380 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
381 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
382 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
383 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
384 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
385 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
386 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
387 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
388 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
389 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
390 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
391 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
392 | LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4 | |
393 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
394 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
395 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
396 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
397 | ||
398 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) | |
399 | LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] | |
400 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
401 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
402 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
403 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
404 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
405 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
406 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
407 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
408 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
409 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
410 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
411 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
412 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
413 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
414 | LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 | |
415 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
416 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
417 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
418 | LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50 | |
419 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
420 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
421 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
422 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
423 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
424 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
425 | WORD $0xd731 // xor edi,edx | |
426 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
427 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
428 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
429 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
430 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
431 | ||
432 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) | |
433 | LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] | |
434 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
435 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
436 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
437 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
438 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
439 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
440 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
441 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
442 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
443 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
444 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
445 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
446 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
447 | WORD $0xdf89 // mov edi,ebx | |
448 | LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 | |
449 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
450 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
451 | WORD $0xcf31 // xor edi,ecx | |
452 | LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20] | |
453 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
454 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
455 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
456 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
457 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
458 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
459 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
460 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
461 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
462 | ||
463 | LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 | |
464 | ||
465 | LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40] | |
466 | LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4 | |
467 | ||
468 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) | |
469 | LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] | |
470 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
471 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
472 | LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4 | |
473 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
474 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
475 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
476 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
477 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
478 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
479 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
480 | LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7 | |
481 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
482 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
483 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
484 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
485 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
486 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
487 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
488 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
489 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
490 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
491 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
492 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
493 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
494 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
495 | WORD $0xdf31 // xor edi,ebx | |
496 | LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa | |
497 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
498 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
499 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
500 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
501 | ||
502 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) | |
503 | LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] | |
504 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
505 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
506 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
507 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
508 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
509 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
510 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
511 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
512 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
513 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
514 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
515 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
516 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
517 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
518 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
519 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
520 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
521 | WORD $0xc731 // xor edi,eax | |
522 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
523 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
524 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
525 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
526 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
527 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
528 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
529 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
530 | LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4 | |
531 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
532 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
533 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
534 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
535 | ||
536 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) | |
537 | LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] | |
538 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
539 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
540 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
541 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
542 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
543 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
544 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
545 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
546 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
547 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
548 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
549 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
550 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
551 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
552 | LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 | |
553 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
554 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
555 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
556 | LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50 | |
557 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
558 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
559 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
560 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
561 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
562 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
563 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
564 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
565 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
566 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
567 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
568 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
569 | ||
570 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) | |
571 | LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] | |
572 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
573 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
574 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
575 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
576 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
577 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
578 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
579 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
580 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
581 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
582 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
583 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
584 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
585 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
586 | LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 | |
587 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
588 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
589 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
590 | LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40] | |
591 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
592 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
593 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
594 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
595 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
596 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
597 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
598 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
599 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
600 | ||
601 | LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 | |
602 | LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4 | |
603 | ||
604 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) | |
605 | LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] | |
606 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
607 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
608 | LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4 | |
609 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
610 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
611 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
612 | LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 | |
613 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
614 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
615 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
616 | LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7 | |
617 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
618 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
619 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
620 | LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 | |
621 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
622 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
623 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
624 | LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe | |
625 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
626 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
627 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
628 | LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 | |
629 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
630 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
631 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
632 | LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa | |
633 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
634 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
635 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
636 | LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb | |
637 | ||
638 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) | |
639 | LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] | |
640 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
641 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
642 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
643 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
644 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
645 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
646 | LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb | |
647 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
648 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
649 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
650 | LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 | |
651 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
652 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
653 | WORD $0xd789 // mov edi,edx | |
654 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
655 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
656 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
657 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
658 | LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 | |
659 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
660 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
661 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
662 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
663 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
664 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
665 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
666 | LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4 | |
667 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
668 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
669 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
670 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
671 | ||
672 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) | |
673 | LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] | |
674 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
675 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
676 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
677 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
678 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
679 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
680 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
681 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
682 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
683 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
684 | LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 | |
685 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
686 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
687 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
688 | LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 | |
689 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
690 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
691 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
692 | LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50 | |
693 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
694 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
695 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
696 | LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa | |
697 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
698 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
699 | WORD $0xd731 // xor edi,edx | |
700 | LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 | |
701 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
702 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
703 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
704 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
705 | ||
706 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) | |
707 | LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] | |
708 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
709 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
710 | LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 | |
711 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
712 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
713 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
714 | LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 | |
715 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
716 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
717 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
718 | LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 | |
719 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
720 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
721 | WORD $0xdf89 // mov edi,ebx | |
722 | LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 | |
723 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
724 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
725 | WORD $0xcf31 // xor edi,ecx | |
726 | LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60] | |
727 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
728 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
729 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
730 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
731 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
732 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
733 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
734 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
735 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
736 | ||
737 | LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 | |
738 | ADDQ $0x80, BP | |
739 | ||
740 | CMPB 0x3(BP), $0x0 | |
741 | JNE loop1 | |
742 | ||
743 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40) | |
744 | LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40] | |
745 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
746 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
747 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
748 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
749 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
750 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
751 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
752 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
753 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
754 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
755 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
756 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
757 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
758 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
759 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
760 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
761 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
762 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
763 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
764 | WORD $0xdf31 // xor edi,ebx | |
765 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
766 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
767 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
768 | ||
769 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44) | |
770 | LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44] | |
771 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
772 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
773 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
774 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
775 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
776 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
777 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
778 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
779 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
780 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
781 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
782 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
783 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
784 | WORD $0xc731 // xor edi,eax | |
785 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
786 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
787 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
788 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
789 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
790 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
791 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
792 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
793 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
794 | ||
795 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48) | |
796 | LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48] | |
797 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
798 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
799 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
800 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
801 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
802 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
803 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
804 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
805 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
806 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
807 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
808 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
809 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
810 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
811 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
812 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
813 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
814 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
815 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
816 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
817 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
818 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
819 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
820 | ||
821 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c) | |
822 | LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c] | |
823 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
824 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
825 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
826 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
827 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
828 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
829 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
830 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
831 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
832 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
833 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
834 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
835 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
836 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
837 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
838 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
839 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
840 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
841 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
842 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
843 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
844 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
845 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
846 | ||
847 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60) | |
848 | LONG $0x60245403 // add edx,[rsp+0x60] | |
849 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
850 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
851 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
852 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
853 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
854 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
855 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
856 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
857 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
858 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
859 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
860 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
861 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
862 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
863 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
864 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
865 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
866 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
867 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
868 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
869 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
870 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
871 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
872 | ||
873 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64) | |
874 | LONG $0x64244c03 // add ecx,[rsp+0x64] | |
875 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
876 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
877 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
878 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
879 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
880 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
881 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
882 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
883 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
884 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
885 | WORD $0xd789 // mov edi,edx | |
886 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
887 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
888 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
889 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
890 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
891 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
892 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
893 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
894 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
895 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
896 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
897 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
898 | ||
899 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68) | |
900 | LONG $0x68245c03 // add ebx,[rsp+0x68] | |
901 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
902 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
903 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
904 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
905 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
906 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
907 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
908 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
909 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
910 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
911 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
912 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
913 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
914 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
915 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
916 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
917 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
918 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
919 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
920 | WORD $0xd731 // xor edi,edx | |
921 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
922 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
923 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
924 | ||
925 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c) | |
926 | LONG $0x6c244403 // add eax,[rsp+0x6c] | |
927 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
928 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
929 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
930 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
931 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
932 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
933 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
934 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
935 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
936 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
937 | WORD $0xdf89 // mov edi,ebx | |
938 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
939 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
940 | WORD $0xcf31 // xor edi,ecx | |
941 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
942 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
943 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
944 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
945 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
946 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
947 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
948 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
949 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
950 | ||
951 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00) | |
952 | LONG $0x241c0344 // add r11d,[rsp] | |
953 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
954 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
955 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
956 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
957 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
958 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
959 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
960 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
961 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
962 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
963 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
964 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
965 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
966 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
967 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
968 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
969 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
970 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
971 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
972 | WORD $0xdf31 // xor edi,ebx | |
973 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
974 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
975 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
976 | ||
977 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04) | |
978 | LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4] | |
979 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
980 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
981 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
982 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
983 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
984 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
985 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
986 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
987 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
988 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
989 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
990 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
991 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
992 | WORD $0xc731 // xor edi,eax | |
993 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
994 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
995 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
996 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
997 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
998 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
999 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1000 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
1001 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
1002 | ||
1003 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08) | |
1004 | LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8] | |
1005 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
1006 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
1007 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
1008 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
1009 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1010 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
1011 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1012 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
1013 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1014 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1015 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
1016 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
1017 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
1018 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
1019 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
1020 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
1021 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
1022 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1023 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1024 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
1025 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1026 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
1027 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
1028 | ||
1029 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c) | |
1030 | LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc] | |
1031 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
1032 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
1033 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
1034 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
1035 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1036 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
1037 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1038 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
1039 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1040 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1041 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
1042 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
1043 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
1044 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
1045 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
1046 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
1047 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
1048 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1049 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1050 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
1051 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1052 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
1053 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
1054 | ||
1055 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20) | |
1056 | LONG $0x20245403 // add edx,[rsp+0x20] | |
1057 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
1058 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
1059 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
1060 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
1061 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1062 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
1063 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1064 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
1065 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1066 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1067 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
1068 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
1069 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
1070 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
1071 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
1072 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
1073 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
1074 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1075 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1076 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
1077 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1078 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
1079 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
1080 | ||
1081 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24) | |
1082 | LONG $0x24244c03 // add ecx,[rsp+0x24] | |
1083 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
1084 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
1085 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
1086 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
1087 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1088 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
1089 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1090 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
1091 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1092 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1093 | WORD $0xd789 // mov edi,edx | |
1094 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
1095 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
1096 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
1097 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
1098 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
1099 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
1100 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1101 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1102 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
1103 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1104 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
1105 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
1106 | ||
1107 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28) | |
1108 | LONG $0x28245c03 // add ebx,[rsp+0x28] | |
1109 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
1110 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
1111 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
1112 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
1113 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1114 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
1115 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1116 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
1117 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1118 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1119 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
1120 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
1121 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
1122 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
1123 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
1124 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
1125 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
1126 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1127 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1128 | WORD $0xd731 // xor edi,edx | |
1129 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1130 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
1131 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
1132 | ||
1133 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c) | |
1134 | LONG $0x2c244403 // add eax,[rsp+0x2c] | |
1135 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
1136 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
1137 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
1138 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
1139 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1140 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
1141 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1142 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
1143 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1144 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1145 | WORD $0xdf89 // mov edi,ebx | |
1146 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
1147 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
1148 | WORD $0xcf31 // xor edi,ecx | |
1149 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
1150 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
1151 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
1152 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1153 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1154 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
1155 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1156 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
1157 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
1158 | ||
1159 | MOVQ 0x200(SP), DI // $_ctx | |
1160 | ADDQ R14, AX | |
1161 | ||
1162 | LEAQ 0x1c0(SP), BP | |
1163 | ||
1164 | ADDL (DI), AX | |
1165 | ADDL 4(DI), BX | |
1166 | ADDL 8(DI), CX | |
1167 | ADDL 12(DI), DX | |
1168 | ADDL 16(DI), R8 | |
1169 | ADDL 20(DI), R9 | |
1170 | ADDL 24(DI), R10 | |
1171 | ADDL 28(DI), R11 | |
1172 | ||
1173 | MOVL AX, (DI) | |
1174 | MOVL BX, 4(DI) | |
1175 | MOVL CX, 8(DI) | |
1176 | MOVL DX, 12(DI) | |
1177 | MOVL R8, 16(DI) | |
1178 | MOVL R9, 20(DI) | |
1179 | MOVL R10, 24(DI) | |
1180 | MOVL R11, 28(DI) | |
1181 | ||
1182 | CMPQ SI, 0x50(BP) // $_end | |
1183 | JE done | |
1184 | ||
1185 | XORQ R14, R14 | |
1186 | MOVQ BX, DI | |
1187 | XORQ CX, DI // magic | |
1188 | MOVQ R9, R12 | |
1189 | 1189 | |
1190 | 1190 | loop2: |
1191 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10) | |
1192 | LONG $0x105d0344 // add r11d,[rbp+0x10] | |
1193 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
1194 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
1195 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
1196 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
1197 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
1198 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
1199 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1200 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
1201 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
1202 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1203 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
1204 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
1205 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
1206 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
1207 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
1208 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
1209 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
1210 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1211 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1212 | WORD $0xdf31 // xor edi,ebx | |
1213 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1214 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
1215 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
1216 | ||
1217 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14) | |
1218 | LONG $0x14550344 // add r10d,[rbp+0x14] | |
1219 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
1220 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
1221 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
1222 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
1223 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
1224 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
1225 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1226 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
1227 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
1228 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1229 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
1230 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
1231 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
1232 | WORD $0xc731 // xor edi,eax | |
1233 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
1234 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
1235 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
1236 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1237 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1238 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
1239 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1240 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
1241 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
1242 | ||
1243 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18) | |
1244 | LONG $0x184d0344 // add r9d,[rbp+0x18] | |
1245 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
1246 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
1247 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
1248 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
1249 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1250 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
1251 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1252 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
1253 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1254 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1255 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
1256 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
1257 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
1258 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
1259 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
1260 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
1261 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
1262 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1263 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1264 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
1265 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1266 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
1267 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
1268 | ||
1269 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c) | |
1270 | LONG $0x1c450344 // add r8d,[rbp+0x1c] | |
1271 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
1272 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
1273 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
1274 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
1275 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1276 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
1277 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1278 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
1279 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1280 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1281 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
1282 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
1283 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
1284 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
1285 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
1286 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
1287 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
1288 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1289 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1290 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
1291 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1292 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
1293 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
1294 | ||
1295 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30) | |
1296 | WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30] | |
1297 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
1298 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
1299 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
1300 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
1301 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1302 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
1303 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1304 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
1305 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1306 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1307 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
1308 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
1309 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
1310 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
1311 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
1312 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
1313 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
1314 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1315 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1316 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
1317 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1318 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
1319 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
1320 | ||
1321 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34) | |
1322 | WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34] | |
1323 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
1324 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
1325 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
1326 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
1327 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1328 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
1329 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1330 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
1331 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1332 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1333 | WORD $0xd789 // mov edi,edx | |
1334 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
1335 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
1336 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
1337 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
1338 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
1339 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
1340 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1341 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1342 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
1343 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1344 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
1345 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
1346 | ||
1347 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38) | |
1348 | WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38] | |
1349 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
1350 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
1351 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
1352 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
1353 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1354 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
1355 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1356 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
1357 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1358 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1359 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
1360 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
1361 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
1362 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
1363 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
1364 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
1365 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
1366 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1367 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1368 | WORD $0xd731 // xor edi,edx | |
1369 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1370 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
1371 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
1372 | ||
1373 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c) | |
1374 | WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c] | |
1375 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
1376 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
1377 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
1378 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
1379 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1380 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
1381 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1382 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
1383 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1384 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1385 | WORD $0xdf89 // mov edi,ebx | |
1386 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
1387 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
1388 | WORD $0xcf31 // xor edi,ecx | |
1389 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
1390 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
1391 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
1392 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1393 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1394 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
1395 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1396 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
1397 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
1398 | ||
1399 | ADDQ $-0x40, BP | |
1400 | CMPQ BP, SP | |
1401 | JAE loop2 | |
1402 | ||
1403 | MOVQ 0x200(SP), DI // $_ctx | |
1404 | ADDQ R14, AX | |
1405 | ||
1406 | ADDQ $0x1c0, SP | |
1407 | ||
1408 | ADDL (DI), AX | |
1409 | ADDL 4(DI), BX | |
1410 | ADDL 8(DI), CX | |
1411 | ADDL 12(DI), DX | |
1412 | ADDL 16(DI), R8 | |
1413 | ADDL 20(DI), R9 | |
1414 | ||
1415 | ADDQ $0x80, SI // input += 2 | |
1416 | ADDL 24(DI), R10 | |
1417 | MOVQ SI, R12 | |
1418 | ADDL 28(DI), R11 | |
1419 | CMPQ SI, 0x50(SP) // input == _end | |
1420 | ||
1421 | MOVL AX, (DI) | |
1422 | LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */ | |
1423 | MOVL AX, (DI) | |
1424 | MOVL BX, 4(DI) | |
1425 | MOVL CX, 8(DI) | |
1426 | MOVL DX, 12(DI) | |
1427 | MOVL R8, 16(DI) | |
1428 | MOVL R9, 20(DI) | |
1429 | MOVL R10, 24(DI) | |
1430 | MOVL R11, 28(DI) | |
1431 | ||
1432 | JBE loop0 | |
1433 | LEAQ (SP), BP | |
1191 | // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10) | |
1192 | LONG $0x105d0344 // add r11d,[rbp+0x10] | |
1193 | WORD $0x2145; BYTE $0xc4 // and r12d,r8d | |
1194 | LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 | |
1195 | LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb | |
1196 | LONG $0x30048d42 // lea eax,[rax+r14*1] | |
1197 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
1198 | LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d | |
1199 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1200 | LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 | |
1201 | LONG $0x231c8d47 // lea r11d,[r11+r12*1] | |
1202 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1203 | WORD $0x8941; BYTE $0xc7 // mov r15d,eax | |
1204 | LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 | |
1205 | LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] | |
1206 | WORD $0x3141; BYTE $0xdf // xor r15d,ebx | |
1207 | LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd | |
1208 | LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 | |
1209 | LONG $0x1a148d42 // lea edx,[rdx+r11*1] | |
1210 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1211 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1212 | WORD $0xdf31 // xor edi,ebx | |
1213 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1214 | LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] | |
1215 | WORD $0x8945; BYTE $0xc4 // mov r12d,r8d | |
1216 | ||
1217 | // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14) | |
1218 | LONG $0x14550344 // add r10d,[rbp+0x14] | |
1219 | WORD $0x2141; BYTE $0xd4 // and r12d,edx | |
1220 | LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 | |
1221 | LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb | |
1222 | LONG $0x331c8d47 // lea r11d,[r11+r14*1] | |
1223 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
1224 | LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d | |
1225 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1226 | LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 | |
1227 | LONG $0x22148d47 // lea r10d,[r10+r12*1] | |
1228 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1229 | WORD $0x8944; BYTE $0xdf // mov edi,r11d | |
1230 | LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 | |
1231 | LONG $0x2a148d47 // lea r10d,[r10+r13*1] | |
1232 | WORD $0xc731 // xor edi,eax | |
1233 | LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd | |
1234 | LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 | |
1235 | LONG $0x110c8d42 // lea ecx,[rcx+r10*1] | |
1236 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1237 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1238 | WORD $0x3141; BYTE $0xc7 // xor r15d,eax | |
1239 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1240 | LONG $0x3a148d47 // lea r10d,[r10+r15*1] | |
1241 | WORD $0x8941; BYTE $0xd4 // mov r12d,edx | |
1242 | ||
1243 | // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18) | |
1244 | LONG $0x184d0344 // add r9d,[rbp+0x18] | |
1245 | WORD $0x2141; BYTE $0xcc // and r12d,ecx | |
1246 | LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 | |
1247 | LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb | |
1248 | LONG $0x32148d47 // lea r10d,[r10+r14*1] | |
1249 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1250 | LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d | |
1251 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1252 | LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 | |
1253 | LONG $0x210c8d47 // lea r9d,[r9+r12*1] | |
1254 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1255 | WORD $0x8945; BYTE $0xd7 // mov r15d,r10d | |
1256 | LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 | |
1257 | LONG $0x290c8d47 // lea r9d,[r9+r13*1] | |
1258 | WORD $0x3145; BYTE $0xdf // xor r15d,r11d | |
1259 | LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd | |
1260 | LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 | |
1261 | LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] | |
1262 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1263 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1264 | WORD $0x3144; BYTE $0xdf // xor edi,r11d | |
1265 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1266 | LONG $0x390c8d45 // lea r9d,[r9+rdi*1] | |
1267 | WORD $0x8941; BYTE $0xcc // mov r12d,ecx | |
1268 | ||
1269 | // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c) | |
1270 | LONG $0x1c450344 // add r8d,[rbp+0x1c] | |
1271 | WORD $0x2141; BYTE $0xdc // and r12d,ebx | |
1272 | LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 | |
1273 | LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb | |
1274 | LONG $0x310c8d47 // lea r9d,[r9+r14*1] | |
1275 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1276 | LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx | |
1277 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1278 | LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 | |
1279 | LONG $0x20048d47 // lea r8d,[r8+r12*1] | |
1280 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1281 | WORD $0x8944; BYTE $0xcf // mov edi,r9d | |
1282 | LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 | |
1283 | LONG $0x28048d47 // lea r8d,[r8+r13*1] | |
1284 | WORD $0x3144; BYTE $0xd7 // xor edi,r10d | |
1285 | LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd | |
1286 | LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 | |
1287 | LONG $0x00048d42 // lea eax,[rax+r8*1] | |
1288 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1289 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1290 | WORD $0x3145; BYTE $0xd7 // xor r15d,r10d | |
1291 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1292 | LONG $0x38048d47 // lea r8d,[r8+r15*1] | |
1293 | WORD $0x8941; BYTE $0xdc // mov r12d,ebx | |
1294 | ||
1295 | // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30) | |
1296 | WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30] | |
1297 | WORD $0x2141; BYTE $0xc4 // and r12d,eax | |
1298 | LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 | |
1299 | LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb | |
1300 | LONG $0x30048d47 // lea r8d,[r8+r14*1] | |
1301 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1302 | LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx | |
1303 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1304 | LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 | |
1305 | LONG $0x22148d42 // lea edx,[rdx+r12*1] | |
1306 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1307 | WORD $0x8945; BYTE $0xc7 // mov r15d,r8d | |
1308 | LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 | |
1309 | LONG $0x2a148d42 // lea edx,[rdx+r13*1] | |
1310 | WORD $0x3145; BYTE $0xcf // xor r15d,r9d | |
1311 | LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd | |
1312 | LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 | |
1313 | LONG $0x131c8d45 // lea r11d,[r11+rdx*1] | |
1314 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1315 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1316 | WORD $0x3144; BYTE $0xcf // xor edi,r9d | |
1317 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1318 | WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] | |
1319 | WORD $0x8941; BYTE $0xc4 // mov r12d,eax | |
1320 | ||
1321 | // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34) | |
1322 | WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34] | |
1323 | WORD $0x2145; BYTE $0xdc // and r12d,r11d | |
1324 | LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 | |
1325 | LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb | |
1326 | LONG $0x32148d42 // lea edx,[rdx+r14*1] | |
1327 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1328 | LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx | |
1329 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1330 | LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 | |
1331 | LONG $0x210c8d42 // lea ecx,[rcx+r12*1] | |
1332 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1333 | WORD $0xd789 // mov edi,edx | |
1334 | LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 | |
1335 | LONG $0x290c8d42 // lea ecx,[rcx+r13*1] | |
1336 | WORD $0x3144; BYTE $0xc7 // xor edi,r8d | |
1337 | LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd | |
1338 | LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 | |
1339 | LONG $0x0a148d45 // lea r10d,[r10+rcx*1] | |
1340 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1341 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1342 | WORD $0x3145; BYTE $0xc7 // xor r15d,r8d | |
1343 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1344 | LONG $0x390c8d42 // lea ecx,[rcx+r15*1] | |
1345 | WORD $0x8945; BYTE $0xdc // mov r12d,r11d | |
1346 | ||
1347 | // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38) | |
1348 | WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38] | |
1349 | WORD $0x2145; BYTE $0xd4 // and r12d,r10d | |
1350 | LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 | |
1351 | LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb | |
1352 | LONG $0x310c8d42 // lea ecx,[rcx+r14*1] | |
1353 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1354 | LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax | |
1355 | WORD $0x3145; BYTE $0xfd // xor r13d,r15d | |
1356 | LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 | |
1357 | LONG $0x231c8d42 // lea ebx,[rbx+r12*1] | |
1358 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1359 | WORD $0x8941; BYTE $0xcf // mov r15d,ecx | |
1360 | LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 | |
1361 | LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] | |
1362 | WORD $0x3141; BYTE $0xd7 // xor r15d,edx | |
1363 | LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd | |
1364 | LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 | |
1365 | LONG $0x190c8d45 // lea r9d,[r9+rbx*1] | |
1366 | WORD $0x2144; BYTE $0xff // and edi,r15d | |
1367 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1368 | WORD $0xd731 // xor edi,edx | |
1369 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1370 | WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] | |
1371 | WORD $0x8945; BYTE $0xd4 // mov r12d,r10d | |
1372 | ||
1373 | // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c) | |
1374 | WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c] | |
1375 | WORD $0x2145; BYTE $0xcc // and r12d,r9d | |
1376 | LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 | |
1377 | LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb | |
1378 | LONG $0x331c8d42 // lea ebx,[rbx+r14*1] | |
1379 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1380 | LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d | |
1381 | WORD $0x3141; BYTE $0xfd // xor r13d,edi | |
1382 | LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 | |
1383 | LONG $0x20048d42 // lea eax,[rax+r12*1] | |
1384 | WORD $0x3145; BYTE $0xf5 // xor r13d,r14d | |
1385 | WORD $0xdf89 // mov edi,ebx | |
1386 | LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 | |
1387 | LONG $0x28048d42 // lea eax,[rax+r13*1] | |
1388 | WORD $0xcf31 // xor edi,ecx | |
1389 | LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd | |
1390 | LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 | |
1391 | LONG $0x00048d45 // lea r8d,[r8+rax*1] | |
1392 | WORD $0x2141; BYTE $0xff // and r15d,edi | |
1393 | WORD $0x3145; BYTE $0xe6 // xor r14d,r12d | |
1394 | WORD $0x3141; BYTE $0xcf // xor r15d,ecx | |
1395 | WORD $0x3145; BYTE $0xee // xor r14d,r13d | |
1396 | LONG $0x38048d42 // lea eax,[rax+r15*1] | |
1397 | WORD $0x8945; BYTE $0xcc // mov r12d,r9d | |
1398 | ||
1399 | ADDQ $-0x40, BP | |
1400 | CMPQ BP, SP | |
1401 | JAE loop2 | |
1402 | ||
1403 | MOVQ 0x200(SP), DI // $_ctx | |
1404 | ADDQ R14, AX | |
1405 | ||
1406 | ADDQ $0x1c0, SP | |
1407 | ||
1408 | ADDL (DI), AX | |
1409 | ADDL 4(DI), BX | |
1410 | ADDL 8(DI), CX | |
1411 | ADDL 12(DI), DX | |
1412 | ADDL 16(DI), R8 | |
1413 | ADDL 20(DI), R9 | |
1414 | ||
1415 | ADDQ $0x80, SI // input += 2 | |
1416 | ADDL 24(DI), R10 | |
1417 | MOVQ SI, R12 | |
1418 | ADDL 28(DI), R11 | |
1419 | CMPQ SI, 0x50(SP) // input == _end | |
1420 | ||
1421 | MOVL AX, (DI) | |
1422 | LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */ | |
1423 | MOVL AX, (DI) | |
1424 | MOVL BX, 4(DI) | |
1425 | MOVL CX, 8(DI) | |
1426 | MOVL DX, 12(DI) | |
1427 | MOVL R8, 16(DI) | |
1428 | MOVL R9, 20(DI) | |
1429 | MOVL R10, 24(DI) | |
1430 | MOVL R11, 28(DI) | |
1431 | ||
1432 | JBE loop0 | |
1433 | LEAQ (SP), BP | |
1434 | 1434 | |
1435 | 1435 | done: |
1436 | MOVQ BP, SP | |
1437 | MOVQ 0x58(SP), SP | |
1438 | WORD $0xf8c5; BYTE $0x77 // vzeroupper | |
1439 | ||
1440 | RET | |
1441 | ||
1436 | MOVQ BP, SP | |
1437 | MOVQ 0x58(SP), SP | |
1438 | WORD $0xf8c5; BYTE $0x77 // vzeroupper | |
1439 | ||
1440 | RET | |
1441 |
27 | 27 | ) |
28 | 28 | |
29 | 29 | //go:noescape |
30 | func sha256_x16_avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) | |
31 | ||
32 | // Do not start at 0 but next multiple of 16 so as to be able to | |
30 | func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) | |
31 | ||
32 | // Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to | |
33 | 33 | // differentiate with default initialiation value of 0 |
34 | const Avx512ServerUid = 16 | |
34 | const Avx512ServerUID = 16 | |
35 | 35 | |
36 | 36 | var uidCounter uint64 |
37 | 37 | |
38 | // NewAvx512 - initialize sha256 Avx512 implementation. | |
38 | 39 | func NewAvx512(a512srv *Avx512Server) hash.Hash { |
39 | 40 | uid := atomic.AddUint64(&uidCounter, 1) |
40 | 41 | return &Avx512Digest{uid: uid, a512srv: a512srv} |
41 | 42 | } |
42 | 43 | |
43 | // Type for computing SHA256 using AVX51 | |
44 | // Avx512Digest - Type for computing SHA256 using Avx512 | |
44 | 45 | type Avx512Digest struct { |
45 | 46 | uid uint64 |
46 | 47 | a512srv *Avx512Server |
51 | 52 | result [Size]byte |
52 | 53 | } |
53 | 54 | |
54 | // Return size of checksum | |
55 | // Size - Return size of checksum | |
55 | 56 | func (d *Avx512Digest) Size() int { return Size } |
56 | 57 | |
57 | // Return blocksize of checksum | |
58 | // BlockSize - Return blocksize of checksum | |
58 | 59 | func (d Avx512Digest) BlockSize() int { return BlockSize } |
59 | 60 | |
61 | // Reset - reset sha digest to its initial values | |
60 | 62 | func (d *Avx512Digest) Reset() { |
61 | 63 | d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true} |
62 | 64 | d.nx = 0 |
68 | 70 | func (d *Avx512Digest) Write(p []byte) (nn int, err error) { |
69 | 71 | |
70 | 72 | if d.final { |
71 | return 0, errors.New("Avx512Digest already finalized. Reset first before writing again.") | |
73 | return 0, errors.New("Avx512Digest already finalized. Reset first before writing again") | |
72 | 74 | } |
73 | 75 | |
74 | 76 | nn = len(p) |
93 | 95 | return |
94 | 96 | } |
95 | 97 | |
96 | // Return sha256 sum in bytes | |
98 | // Sum - Return sha256 sum in bytes | |
97 | 99 | func (d *Avx512Digest) Sum(in []byte) (result []byte) { |
98 | 100 | |
99 | 101 | if d.final { |
261 | 263 | func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte { |
262 | 264 | |
263 | 265 | scratch := [512]byte{} |
264 | sha256_x16_avx512(digests, &scratch, &table, mask, input) | |
266 | sha256X16Avx512(digests, &scratch, &table, mask, input) | |
265 | 267 | |
266 | 268 | output := [16][Size]byte{} |
267 | 269 | for i := 0; i < 16; i++ { |
289 | 291 | sumCh chan [Size]byte |
290 | 292 | } |
291 | 293 | |
292 | // Type to implement 16x parallel handling of SHA256 invocations | |
294 | // Avx512Server - Type to implement 16x parallel handling of SHA256 invocations | |
293 | 295 | type Avx512Server struct { |
294 | 296 | blocksCh chan blockInput // Input channel |
295 | 297 | totalIn int // Total number of inputs waiting to be processed |
297 | 299 | digests map[uint64][Size]byte // Map of uids to (interim) digest results |
298 | 300 | } |
299 | 301 | |
300 | // Info for each lane | |
302 | // Avx512LaneInfo - Info for each lane | |
301 | 303 | type Avx512LaneInfo struct { |
302 | 304 | uid uint64 // unique identification for this SHA processing |
303 | 305 | block []byte // input block to be processed |
304 | 306 | outputCh chan [Size]byte // channel for output result |
305 | 307 | } |
306 | 308 | |
307 | // Create new object for parallel processing handling | |
309 | // NewAvx512Server - Create new object for parallel processing handling | |
308 | 310 | func NewAvx512Server() *Avx512Server { |
309 | 311 | a512srv := &Avx512Server{} |
310 | 312 | a512srv.digests = make(map[uint64][Size]byte) |
315 | 317 | return a512srv |
316 | 318 | } |
317 | 319 | |
318 | // Sole handler for reading from the input channel | |
320 | // Process - Sole handler for reading from the input channel | |
319 | 321 | func (a512srv *Avx512Server) Process() { |
320 | 322 | for { |
321 | 323 | select { |
362 | 364 | if lane.uid == uid { |
363 | 365 | if lane.block != nil { |
364 | 366 | a512srv.lanes[i] = Avx512LaneInfo{} // clear message |
365 | a512srv.totalIn -= 1 | |
367 | a512srv.totalIn-- | |
366 | 368 | } |
367 | 369 | } |
368 | 370 | } |
402 | 404 | return len(p), nil |
403 | 405 | } |
404 | 406 | |
407 | // Sum - return sha256 sum in bytes for a given sum id. | |
405 | 408 | func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte { |
406 | 409 | sumCh := make(chan [32]byte) |
407 | 410 | a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh} |
0 | TEXT ·sha256_x16_avx512(SB), 7, $0 | |
1 | MOVQ digests+0(FP), DI | |
2 | MOVQ scratch+8(FP), R12 | |
3 | MOVQ mask_len+32(FP), SI | |
4 | MOVQ r14+24(FP), R13 | |
5 | MOVQ (R13), R14 | |
6 | LONG $0x92fbc1c4; BYTE $0xce | |
7 | LEAQ inputs+48(FP), AX | |
8 | QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07 | |
9 | MOVQ table+16(FP), DX | |
10 | WORD $0x3148; BYTE $0xc9 | |
0 | TEXT ·sha256X16Avx512(SB), 7, $0 | |
1 | MOVQ digests+0(FP), DI | |
2 | MOVQ scratch+8(FP), R12 | |
3 | MOVQ mask_len+32(FP), SI | |
4 | MOVQ r14+24(FP), R13 | |
5 | MOVQ (R13), R14 | |
6 | LONG $0x92fbc1c4; BYTE $0xce | |
7 | LEAQ inputs+48(FP), AX | |
8 | QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07 | |
9 | MOVQ table+16(FP), DX | |
10 | WORD $0x3148; BYTE $0xc9 | |
11 | 11 | TESTQ $(1<<0), R14 |
12 | JE skipInput0 | |
13 | MOVQ 0*24(AX), R9 | |
14 | LONG $0x487cc162; WORD $0x0410; BYTE $0x09 | |
12 | JE skipInput0 | |
13 | MOVQ 0*24(AX), R9 | |
14 | LONG $0x487cc162; WORD $0x0410; BYTE $0x09 | |
15 | ||
15 | 16 | skipInput0: |
16 | 17 | TESTQ $(1<<1), R14 |
17 | JE skipInput1 | |
18 | MOVQ 1*24(AX), R9 | |
19 | LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 | |
18 | JE skipInput1 | |
19 | MOVQ 1*24(AX), R9 | |
20 | LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 | |
21 | ||
20 | 22 | skipInput1: |
21 | 23 | TESTQ $(1<<2), R14 |
22 | JE skipInput2 | |
23 | MOVQ 2*24(AX), R9 | |
24 | LONG $0x487cc162; WORD $0x1410; BYTE $0x09 | |
24 | JE skipInput2 | |
25 | MOVQ 2*24(AX), R9 | |
26 | LONG $0x487cc162; WORD $0x1410; BYTE $0x09 | |
27 | ||
25 | 28 | skipInput2: |
26 | 29 | TESTQ $(1<<3), R14 |
27 | JE skipInput3 | |
28 | MOVQ 3*24(AX), R9 | |
29 | LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 | |
30 | JE skipInput3 | |
31 | MOVQ 3*24(AX), R9 | |
32 | LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 | |
33 | ||
30 | 34 | skipInput3: |
31 | 35 | TESTQ $(1<<4), R14 |
32 | JE skipInput4 | |
33 | MOVQ 4*24(AX), R9 | |
34 | LONG $0x487cc162; WORD $0x2410; BYTE $0x09 | |
36 | JE skipInput4 | |
37 | MOVQ 4*24(AX), R9 | |
38 | LONG $0x487cc162; WORD $0x2410; BYTE $0x09 | |
39 | ||
35 | 40 | skipInput4: |
36 | 41 | TESTQ $(1<<5), R14 |
37 | JE skipInput5 | |
38 | MOVQ 5*24(AX), R9 | |
39 | LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 | |
42 | JE skipInput5 | |
43 | MOVQ 5*24(AX), R9 | |
44 | LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 | |
45 | ||
40 | 46 | skipInput5: |
41 | 47 | TESTQ $(1<<6), R14 |
42 | JE skipInput6 | |
43 | MOVQ 6*24(AX), R9 | |
44 | LONG $0x487cc162; WORD $0x3410; BYTE $0x09 | |
48 | JE skipInput6 | |
49 | MOVQ 6*24(AX), R9 | |
50 | LONG $0x487cc162; WORD $0x3410; BYTE $0x09 | |
51 | ||
45 | 52 | skipInput6: |
46 | 53 | TESTQ $(1<<7), R14 |
47 | JE skipInput7 | |
48 | MOVQ 7*24(AX), R9 | |
49 | LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 | |
54 | JE skipInput7 | |
55 | MOVQ 7*24(AX), R9 | |
56 | LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 | |
57 | ||
50 | 58 | skipInput7: |
51 | 59 | TESTQ $(1<<8), R14 |
52 | JE skipInput8 | |
53 | MOVQ 8*24(AX), R9 | |
54 | LONG $0x487c4162; WORD $0x0410; BYTE $0x09 | |
60 | JE skipInput8 | |
61 | MOVQ 8*24(AX), R9 | |
62 | LONG $0x487c4162; WORD $0x0410; BYTE $0x09 | |
63 | ||
55 | 64 | skipInput8: |
56 | 65 | TESTQ $(1<<9), R14 |
57 | JE skipInput9 | |
58 | MOVQ 9*24(AX), R9 | |
59 | LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 | |
66 | JE skipInput9 | |
67 | MOVQ 9*24(AX), R9 | |
68 | LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 | |
69 | ||
60 | 70 | skipInput9: |
61 | 71 | TESTQ $(1<<10), R14 |
62 | JE skipInput10 | |
63 | MOVQ 10*24(AX), R9 | |
64 | LONG $0x487c4162; WORD $0x1410; BYTE $0x09 | |
72 | JE skipInput10 | |
73 | MOVQ 10*24(AX), R9 | |
74 | LONG $0x487c4162; WORD $0x1410; BYTE $0x09 | |
75 | ||
65 | 76 | skipInput10: |
66 | 77 | TESTQ $(1<<11), R14 |
67 | JE skipInput11 | |
68 | MOVQ 11*24(AX), R9 | |
69 | LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 | |
78 | JE skipInput11 | |
79 | MOVQ 11*24(AX), R9 | |
80 | LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 | |
81 | ||
70 | 82 | skipInput11: |
71 | 83 | TESTQ $(1<<12), R14 |
72 | JE skipInput12 | |
73 | MOVQ 12*24(AX), R9 | |
74 | LONG $0x487c4162; WORD $0x2410; BYTE $0x09 | |
84 | JE skipInput12 | |
85 | MOVQ 12*24(AX), R9 | |
86 | LONG $0x487c4162; WORD $0x2410; BYTE $0x09 | |
87 | ||
75 | 88 | skipInput12: |
76 | 89 | TESTQ $(1<<13), R14 |
77 | JE skipInput13 | |
78 | MOVQ 13*24(AX), R9 | |
79 | LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 | |
90 | JE skipInput13 | |
91 | MOVQ 13*24(AX), R9 | |
92 | LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 | |
93 | ||
80 | 94 | skipInput13: |
81 | 95 | TESTQ $(1<<14), R14 |
82 | JE skipInput14 | |
83 | MOVQ 14*24(AX), R9 | |
84 | LONG $0x487c4162; WORD $0x3410; BYTE $0x09 | |
96 | JE skipInput14 | |
97 | MOVQ 14*24(AX), R9 | |
98 | LONG $0x487c4162; WORD $0x3410; BYTE $0x09 | |
99 | ||
85 | 100 | skipInput14: |
86 | 101 | TESTQ $(1<<15), R14 |
87 | JE skipInput15 | |
88 | MOVQ 15*24(AX), R9 | |
89 | LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 | |
102 | JE skipInput15 | |
103 | MOVQ 15*24(AX), R9 | |
104 | LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 | |
105 | ||
90 | 106 | skipInput15: |
91 | 107 | lloop: |
92 | LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX | |
93 | LONG $0x487e7162; WORD $0x1a6f | |
94 | MOVQ table+16(FP), DX | |
95 | QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88 | |
96 | LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX | |
97 | LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8 | |
98 | QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005 | |
99 | JE lastLoop | |
100 | ADDQ $8, R13 | |
101 | MOVQ (R13), R14 | |
102 | QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31 | |
103 | TESTQ $(1<<0), R14 | |
104 | JE skipNext0 | |
105 | MOVQ 0*24(AX), R9 | |
106 | LONG $0x487cc162; WORD $0x0410; BYTE $0x09 | |
108 | LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX | |
109 | LONG $0x487e7162; WORD $0x1a6f | |
110 | MOVQ table+16(FP), DX | |
111 | QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88 | |
112 | LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX | |
113 | LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8 | |
114 | QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005 | |
115 | JE lastLoop | |
116 | ADDQ $8, R13 | |
117 | MOVQ (R13), R14 | |
118 | QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31 | |
119 | TESTQ $(1<<0), R14 | |
120 | JE skipNext0 | |
121 | MOVQ 0*24(AX), R9 | |
122 | LONG $0x487cc162; WORD $0x0410; BYTE $0x09 | |
123 | ||
107 | 124 | skipNext0: |
108 | QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32 | |
125 | QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32 | |
109 | 126 | TESTQ $(1<<1), R14 |
110 | JE skipNext1 | |
111 | MOVQ 1*24(AX), R9 | |
112 | LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 | |
127 | JE skipNext1 | |
128 | MOVQ 1*24(AX), R9 | |
129 | LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 | |
130 | ||
113 | 131 | skipNext1: |
114 | QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33 | |
132 | QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33 | |
115 | 133 | TESTQ $(1<<2), R14 |
116 | JE skipNext2 | |
117 | MOVQ 2*24(AX), R9 | |
118 | LONG $0x487cc162; WORD $0x1410; BYTE $0x09 | |
134 | JE skipNext2 | |
135 | MOVQ 2*24(AX), R9 | |
136 | LONG $0x487cc162; WORD $0x1410; BYTE $0x09 | |
137 | ||
119 | 138 | skipNext2: |
120 | QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34 | |
139 | QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34 | |
121 | 140 | TESTQ $(1<<3), R14 |
122 | JE skipNext3 | |
123 | MOVQ 3*24(AX), R9 | |
124 | LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 | |
141 | JE skipNext3 | |
142 | MOVQ 3*24(AX), R9 | |
143 | LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 | |
144 | ||
125 | 145 | skipNext3: |
126 | QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35 | |
146 | QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35 | |
127 | 147 | TESTQ $(1<<4), R14 |
128 | JE skipNext4 | |
129 | MOVQ 4*24(AX), R9 | |
130 | LONG $0x487cc162; WORD $0x2410; BYTE $0x09 | |
148 | JE skipNext4 | |
149 | MOVQ 4*24(AX), R9 | |
150 | LONG $0x487cc162; WORD $0x2410; BYTE $0x09 | |
151 | ||
131 | 152 | skipNext4: |
132 | QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36 | |
153 | QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36 | |
133 | 154 | TESTQ $(1<<5), R14 |
134 | JE skipNext5 | |
135 | MOVQ 5*24(AX), R9 | |
136 | LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 | |
155 | JE skipNext5 | |
156 | MOVQ 5*24(AX), R9 | |
157 | LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 | |
158 | ||
137 | 159 | skipNext5: |
138 | QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37 | |
160 | QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37 | |
139 | 161 | TESTQ $(1<<6), R14 |
140 | JE skipNext6 | |
141 | MOVQ 6*24(AX), R9 | |
142 | LONG $0x487cc162; WORD $0x3410; BYTE $0x09 | |
162 | JE skipNext6 | |
163 | MOVQ 6*24(AX), R9 | |
164 | LONG $0x487cc162; WORD $0x3410; BYTE $0x09 | |
165 | ||
143 | 166 | skipNext6: |
144 | QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38 | |
167 | QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38 | |
145 | 168 | TESTQ $(1<<7), R14 |
146 | JE skipNext7 | |
147 | MOVQ 7*24(AX), R9 | |
148 | LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 | |
169 | JE skipNext7 | |
170 | MOVQ 7*24(AX), R9 | |
171 | LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 | |
172 | ||
149 | 173 | skipNext7: |
150 | QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39 | |
174 | QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39 | |
151 | 175 | TESTQ $(1<<8), R14 |
152 | JE skipNext8 | |
153 | MOVQ 8*24(AX), R9 | |
154 | LONG $0x487c4162; WORD $0x0410; BYTE $0x09 | |
176 | JE skipNext8 | |
177 | MOVQ 8*24(AX), R9 | |
178 | LONG $0x487c4162; WORD $0x0410; BYTE $0x09 | |
179 | ||
155 | 180 | skipNext8: |
156 | QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a | |
181 | QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a | |
157 | 182 | TESTQ $(1<<9), R14 |
158 | JE skipNext9 | |
159 | MOVQ 9*24(AX), R9 | |
160 | LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 | |
183 | JE skipNext9 | |
184 | MOVQ 9*24(AX), R9 | |
185 | LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 | |
186 | ||
161 | 187 | skipNext9: |
162 | QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b | |
188 | QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b | |
163 | 189 | TESTQ $(1<<10), R14 |
164 | JE skipNext10 | |
165 | MOVQ 10*24(AX), R9 | |
166 | LONG $0x487c4162; WORD $0x1410; BYTE $0x09 | |
190 | JE skipNext10 | |
191 | MOVQ 10*24(AX), R9 | |
192 | LONG $0x487c4162; WORD $0x1410; BYTE $0x09 | |
193 | ||
167 | 194 | skipNext10: |
168 | QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c | |
195 | QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c | |
169 | 196 | TESTQ $(1<<11), R14 |
170 | JE skipNext11 | |
171 | MOVQ 11*24(AX), R9 | |
172 | LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 | |
197 | JE skipNext11 | |
198 | MOVQ 11*24(AX), R9 | |
199 | LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 | |
200 | ||
173 | 201 | skipNext11: |
174 | QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d | |
202 | QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d | |
175 | 203 | TESTQ $(1<<12), R14 |
176 | JE skipNext12 | |
177 | MOVQ 12*24(AX), R9 | |
178 | LONG $0x487c4162; WORD $0x2410; BYTE $0x09 | |
204 | JE skipNext12 | |
205 | MOVQ 12*24(AX), R9 | |
206 | LONG $0x487c4162; WORD $0x2410; BYTE $0x09 | |
207 | ||
179 | 208 | skipNext12: |
180 | QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e | |
209 | QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e | |
181 | 210 | TESTQ $(1<<13), R14 |
182 | JE skipNext13 | |
183 | MOVQ 13*24(AX), R9 | |
184 | LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 | |
211 | JE skipNext13 | |
212 | MOVQ 13*24(AX), R9 | |
213 | LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 | |
214 | ||
185 | 215 | skipNext13: |
186 | QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f | |
216 | QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f | |
187 | 217 | TESTQ $(1<<14), R14 |
188 | JE skipNext14 | |
189 | MOVQ 14*24(AX), R9 | |
190 | LONG $0x487c4162; WORD $0x3410; BYTE $0x09 | |
218 | JE skipNext14 | |
219 | MOVQ 14*24(AX), R9 | |
220 | LONG $0x487c4162; WORD $0x3410; BYTE $0x09 | |
221 | ||
191 | 222 | skipNext14: |
192 | QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40 | |
223 | QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40 | |
193 | 224 | TESTQ $(1<<15), R14 |
194 | JE skipNext15 | |
195 | MOVQ 15*24(AX), R9 | |
196 | LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 | |
225 | JE skipNext15 | |
226 | MOVQ 15*24(AX), R9 | |
227 | LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 | |
228 | ||
197 | 229 | skipNext15: |
198 | 230 | QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0xc4fbfe4945d16207; LONG $0xce92fbc1 |
199 | JMP lloop | |
231 | JMP lloop | |
232 | ||
200 | 233 | lastLoop: |
201 | QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f | |
234 | QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f | |
202 | 235 | VZEROUPPER |
203 | 236 | RET |
237 | ||
204 | 238 | DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203 |
205 | 239 | DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b |
206 | 240 | DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203 |
209 | 209 | for i := 0; i < 16; i++ { |
210 | 210 | input := make([]byte, 64) |
211 | 211 | copy(input, golden[offset+i].in) |
212 | server.Write(uint64(Avx512ServerUid+i), input) | |
212 | server.Write(uint64(Avx512ServerUID+i), input) | |
213 | 213 | } |
214 | 214 | |
215 | 215 | // Second block of 64 bytes |
216 | 216 | for i := 0; i < 16; i++ { |
217 | 217 | input := make([]byte, 64) |
218 | 218 | copy(input, golden[offset+i].in[64:]) |
219 | server.Write(uint64(Avx512ServerUid+i), input) | |
219 | server.Write(uint64(Avx512ServerUID+i), input) | |
220 | 220 | } |
221 | 221 | |
222 | 222 | wg := sync.WaitGroup{} |
240 | 240 | t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:])) |
241 | 241 | } |
242 | 242 | wg.Done() |
243 | }(i, uint64(Avx512ServerUid+i), input) | |
243 | }(i, uint64(Avx512ServerUID+i), input) | |
244 | 244 | } |
245 | 245 | |
246 | 246 | wg.Wait() |
34 | 34 | #include "textflag.h" |
35 | 35 | |
36 | 36 | #define ROTATE_XS \ |
37 | MOVOU X4, X15 \ | |
38 | MOVOU X5, X4 \ | |
39 | MOVOU X6, X5 \ | |
40 | MOVOU X7, X6 \ | |
41 | MOVOU X15, X7 | |
37 | MOVOU X4, X15 \ | |
38 | MOVOU X5, X4 \ | |
39 | MOVOU X6, X5 \ | |
40 | MOVOU X7, X6 \ | |
41 | MOVOU X15, X7 | |
42 | 42 | |
43 | 43 | // compute s0 four at a time and s1 two at a time |
44 | 44 | // compute W[-16] + W[-7] 4 at a time |
45 | 45 | #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \ |
46 | MOVL e, R13 \ /* y0 = e */ | |
47 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
48 | MOVL a, R14 \ /* y1 = a */ | |
49 | LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */ | |
50 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
51 | XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
52 | MOVL f, R15 \ /* y2 = f */ | |
53 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
54 | XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
55 | XORL g, R15 \ /* y2 = f^g */ | |
56 | LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ | |
57 | XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */ | |
58 | ANDL e, R15 \ /* y2 = (f^g)&e */ | |
59 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
60 | \ /* */ | |
61 | \ /* compute s0 */ | |
62 | \ /* */ | |
63 | LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */ | |
64 | XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
65 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
66 | XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
67 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
68 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
69 | ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
70 | MOVL a, R13 \ /* y0 = a */ | |
71 | ADDL R15, h \ /* h = h + S1 + CH + k + w */ | |
72 | \ /* ROTATE_ARGS */ | |
73 | MOVL a, R15 \ /* y2 = a */ | |
74 | LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */ | |
75 | ORL c, R13 \ /* y0 = a|c */ | |
76 | ADDL h, d \ /* d = d + h + S1 + CH + k + w */ | |
77 | ANDL c, R15 \ /* y2 = a&c */ | |
78 | LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */ | |
79 | ANDL b, R13 \ /* y0 = (a|c)&b */ | |
80 | ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ | |
81 | LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ | |
82 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
83 | ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
84 | \ /* ROTATE_ARGS */ | |
85 | MOVL d, R13 \ /* y0 = e */ | |
86 | MOVL h, R14 \ /* y1 = a */ | |
87 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
88 | XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
89 | MOVL e, R15 \ /* y2 = f */ | |
90 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
91 | LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */ | |
92 | XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
93 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
94 | XORL f, R15 \ /* y2 = f^g */ | |
95 | LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */ | |
96 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
97 | XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
98 | ANDL d, R15 \ /* y2 = (f^g)&e */ | |
99 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
100 | LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */ | |
101 | XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
102 | XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
103 | LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */ | |
104 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
105 | ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
106 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
107 | LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ | |
108 | MOVL h, R13 \ /* y0 = a */ | |
109 | ADDL R15, g \ /* h = h + S1 + CH + k + w */ | |
110 | MOVL h, R15 \ /* y2 = a */ | |
111 | LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */ | |
112 | ORL b, R13 \ /* y0 = a|c */ | |
113 | ADDL g, c \ /* d = d + h + S1 + CH + k + w */ | |
114 | ANDL b, R15 \ /* y2 = a&c */ | |
115 | \ /* */ | |
116 | \ /* compute low s1 */ | |
117 | \ /* */ | |
118 | LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ | |
119 | ANDL a, R13 \ /* y0 = (a|c)&b */ | |
120 | ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */ | |
121 | LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ | |
122 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
123 | ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
124 | \ /* ROTATE_ARGS */ | |
125 | MOVL c, R13 \ /* y0 = e */ | |
126 | MOVL g, R14 \ /* y1 = a */ | |
127 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
128 | XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
129 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
130 | MOVL d, R15 \ /* y2 = f */ | |
131 | XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
132 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
133 | LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ | |
134 | XORL e, R15 \ /* y2 = f^g */ | |
135 | LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ | |
136 | XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
137 | ANDL c, R15 \ /* y2 = (f^g)&e */ | |
138 | LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ | |
139 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
140 | XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
141 | XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
142 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
143 | LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ | |
144 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
145 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
146 | ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
147 | LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ | |
148 | MOVL g, R13 \ /* y0 = a */ | |
149 | ADDL R15, f \ /* h = h + S1 + CH + k + w */ | |
150 | MOVL g, R15 \ /* y2 = a */ | |
151 | LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */ | |
152 | ORL a, R13 \ /* y0 = a|c */ | |
153 | ADDL f, b \ /* d = d + h + S1 + CH + k + w */ | |
154 | ANDL a, R15 \ /* y2 = a&c */ | |
155 | LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ | |
156 | ANDL h, R13 \ /* y0 = (a|c)&b */ | |
157 | ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */ | |
158 | \ /* */ | |
159 | \ /* compute high s1 */ | |
160 | \ /* */ | |
161 | LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ | |
162 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
163 | ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
164 | \ /* ROTATE_ARGS */ | |
165 | MOVL b, R13 \ /* y0 = e */ | |
166 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
167 | MOVL f, R14 \ /* y1 = a */ | |
168 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
169 | XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
170 | MOVL c, R15 \ /* y2 = f */ | |
171 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
172 | LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ | |
173 | XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
174 | XORL d, R15 \ /* y2 = f^g */ | |
175 | LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ | |
176 | XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
177 | ANDL b, R15 \ /* y2 = (f^g)&e */ | |
178 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
179 | LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ | |
180 | XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
181 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
182 | XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
183 | LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ | |
184 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
185 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
186 | ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
187 | LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ | |
188 | MOVL f, R13 \ /* y0 = a */ | |
189 | ADDL R15, e \ /* h = h + S1 + CH + k + w */ | |
190 | MOVL f, R15 \ /* y2 = a */ | |
191 | LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */ | |
192 | ORL h, R13 \ /* y0 = a|c */ | |
193 | ADDL e, a \ /* d = d + h + S1 + CH + k + w */ | |
194 | ANDL h, R15 \ /* y2 = a&c */ | |
195 | LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ | |
196 | ANDL g, R13 \ /* y0 = (a|c)&b */ | |
197 | ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */ | |
198 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
199 | ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
200 | \ /* ROTATE_ARGS */ | |
201 | ROTATE_XS | |
202 | ||
46 | MOVL e, R13 \ // y0 = e | |
47 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
48 | MOVL a, R14 \ // y1 = a | |
49 | LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */ | |
50 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
51 | XORL e, R13 \ // y0 = e ^ (e >> (25-11)) | |
52 | MOVL f, R15 \ // y2 = f | |
53 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
54 | XORL a, R14 \ // y1 = a ^ (a >> (22-13) | |
55 | XORL g, R15 \ // y2 = f^g | |
56 | LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ | |
57 | XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) | |
58 | ANDL e, R15 \ // y2 = (f^g)&e | |
59 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
60 | \ | |
61 | \ // compute s0 | |
62 | \ | |
63 | LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */ | |
64 | XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
65 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
66 | XORL g, R15 \ // y2 = CH = ((f^g)&e)^g | |
67 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
68 | ADDL R13, R15 \ // y2 = S1 + CH | |
69 | ADDL _xfer+48(FP), R15 \ // y2 = k + w + S1 + CH | |
70 | MOVL a, R13 \ // y0 = a | |
71 | ADDL R15, h \ // h = h + S1 + CH + k + w | |
72 | \ // ROTATE_ARGS | |
73 | MOVL a, R15 \ // y2 = a | |
74 | LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */ | |
75 | ORL c, R13 \ // y0 = a|c | |
76 | ADDL h, d \ // d = d + h + S1 + CH + k + w | |
77 | ANDL c, R15 \ // y2 = a&c | |
78 | LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */ | |
79 | ANDL b, R13 \ // y0 = (a|c)&b | |
80 | ADDL R14, h \ // h = h + S1 + CH + k + w + S0 | |
81 | LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ | |
82 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
83 | ADDL R13, h \ // h = h + S1 + CH + k + w + S0 + MAJ | |
84 | \ // ROTATE_ARGS | |
85 | MOVL d, R13 \ // y0 = e | |
86 | MOVL h, R14 \ // y1 = a | |
87 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
88 | XORL d, R13 \ // y0 = e ^ (e >> (25-11)) | |
89 | MOVL e, R15 \ // y2 = f | |
90 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
91 | LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */ | |
92 | XORL h, R14 \ // y1 = a ^ (a >> (22-13) | |
93 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
94 | XORL f, R15 \ // y2 = f^g | |
95 | LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */ | |
96 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
97 | XORL d, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
98 | ANDL d, R15 \ // y2 = (f^g)&e | |
99 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
100 | LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */ | |
101 | XORL h, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
102 | XORL f, R15 \ // y2 = CH = ((f^g)&e)^g | |
103 | LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */ | |
104 | ADDL R13, R15 \ // y2 = S1 + CH | |
105 | ADDL _xfer+52(FP), R15 \ // y2 = k + w + S1 + CH | |
106 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
107 | LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ | |
108 | MOVL h, R13 \ // y0 = a | |
109 | ADDL R15, g \ // h = h + S1 + CH + k + w | |
110 | MOVL h, R15 \ // y2 = a | |
111 | LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */ | |
112 | ORL b, R13 \ // y0 = a|c | |
113 | ADDL g, c \ // d = d + h + S1 + CH + k + w | |
114 | ANDL b, R15 \ // y2 = a&c | |
115 | \ | |
116 | \ // compute low s1 | |
117 | \ | |
118 | LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ | |
119 | ANDL a, R13 \ // y0 = (a|c)&b | |
120 | ADDL R14, g \ // h = h + S1 + CH + k + w + S0 | |
121 | LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ | |
122 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
123 | ADDL R13, g \ // h = h + S1 + CH + k + w + S0 + MAJ | |
124 | \ // ROTATE_ARGS | |
125 | MOVL c, R13 \ // y0 = e | |
126 | MOVL g, R14 \ // y1 = a | |
127 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
128 | XORL c, R13 \ // y0 = e ^ (e >> (25-11)) | |
129 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
130 | MOVL d, R15 \ // y2 = f | |
131 | XORL g, R14 \ // y1 = a ^ (a >> (22-13) | |
132 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
133 | LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ | |
134 | XORL e, R15 \ // y2 = f^g | |
135 | LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ | |
136 | XORL c, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
137 | ANDL c, R15 \ // y2 = (f^g)&e | |
138 | LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ | |
139 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
140 | XORL g, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
141 | XORL e, R15 \ // y2 = CH = ((f^g)&e)^g | |
142 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
143 | LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ | |
144 | ADDL R13, R15 \ // y2 = S1 + CH | |
145 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
146 | ADDL _xfer+56(FP), R15 \ // y2 = k + w + S1 + CH | |
147 | LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ | |
148 | MOVL g, R13 \ // y0 = a | |
149 | ADDL R15, f \ // h = h + S1 + CH + k + w | |
150 | MOVL g, R15 \ // y2 = a | |
151 | LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */ | |
152 | ORL a, R13 \ // y0 = a|c | |
153 | ADDL f, b \ // d = d + h + S1 + CH + k + w | |
154 | ANDL a, R15 \ // y2 = a&c | |
155 | LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ | |
156 | ANDL h, R13 \ // y0 = (a|c)&b | |
157 | ADDL R14, f \ // h = h + S1 + CH + k + w + S0 | |
158 | \ | |
159 | \ // compute high s1 | |
160 | \ | |
161 | LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ | |
162 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
163 | ADDL R13, f \ // h = h + S1 + CH + k + w + S0 + MAJ | |
164 | \ // ROTATE_ARGS | |
165 | MOVL b, R13 \ // y0 = e | |
166 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
167 | MOVL f, R14 \ // y1 = a | |
168 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
169 | XORL b, R13 \ // y0 = e ^ (e >> (25-11)) | |
170 | MOVL c, R15 \ // y2 = f | |
171 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
172 | LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ | |
173 | XORL f, R14 \ // y1 = a ^ (a >> (22-13) | |
174 | XORL d, R15 \ // y2 = f^g | |
175 | LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ | |
176 | XORL b, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
177 | ANDL b, R15 \ // y2 = (f^g)&e | |
178 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
179 | LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ | |
180 | XORL f, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
181 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
182 | XORL d, R15 \ // y2 = CH = ((f^g)&e)^g | |
183 | LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ | |
184 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
185 | ADDL R13, R15 \ // y2 = S1 + CH | |
186 | ADDL _xfer+60(FP), R15 \ // y2 = k + w + S1 + CH | |
187 | LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ | |
188 | MOVL f, R13 \ // y0 = a | |
189 | ADDL R15, e \ // h = h + S1 + CH + k + w | |
190 | MOVL f, R15 \ // y2 = a | |
191 | LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */ | |
192 | ORL h, R13 \ // y0 = a|c | |
193 | ADDL e, a \ // d = d + h + S1 + CH + k + w | |
194 | ANDL h, R15 \ // y2 = a&c | |
195 | LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ | |
196 | ANDL g, R13 \ // y0 = (a|c)&b | |
197 | ADDL R14, e \ // h = h + S1 + CH + k + w + S0 | |
198 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
199 | ADDL R13, e \ // h = h + S1 + CH + k + w + S0 + MAJ | |
200 | \ // ROTATE_ARGS | |
201 | ROTATE_XS | |
203 | 202 | |
204 | 203 | #define DO_ROUND(a, b, c, d, e, f, g, h, offset) \ |
205 | MOVL e, R13 \ /* y0 = e */ | |
206 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
207 | MOVL a, R14 \ /* y1 = a */ | |
208 | XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
209 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
210 | MOVL f, R15 \ /* y2 = f */ | |
211 | XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
212 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
213 | XORL g, R15 \ /* y2 = f^g */ | |
214 | XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
215 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
216 | ANDL e, R15 \ /* y2 = (f^g)&e */ | |
217 | XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
218 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
219 | XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
220 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
221 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
222 | ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
223 | MOVL a, R13 \ /* y0 = a */ | |
224 | ADDL R15, h \ /* h = h + S1 + CH + k + w */ | |
225 | MOVL a, R15 \ /* y2 = a */ | |
226 | ORL c, R13 \ /* y0 = a|c */ | |
227 | ADDL h, d \ /* d = d + h + S1 + CH + k + w */ | |
228 | ANDL c, R15 \ /* y2 = a&c */ | |
229 | ANDL b, R13 \ /* y0 = (a|c)&b */ | |
230 | ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ | |
231 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
232 | ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
233 | ||
204 | MOVL e, R13 \ // y0 = e | |
205 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
206 | MOVL a, R14 \ // y1 = a | |
207 | XORL e, R13 \ // y0 = e ^ (e >> (25-11)) | |
208 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
209 | MOVL f, R15 \ // y2 = f | |
210 | XORL a, R14 \ // y1 = a ^ (a >> (22-13) | |
211 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
212 | XORL g, R15 \ // y2 = f^g | |
213 | XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
214 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
215 | ANDL e, R15 \ // y2 = (f^g)&e | |
216 | XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
217 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
218 | XORL g, R15 \ // y2 = CH = ((f^g)&e)^g | |
219 | ADDL R13, R15 \ // y2 = S1 + CH | |
220 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
221 | ADDL _xfer+offset(FP), R15 \ // y2 = k + w + S1 + CH | |
222 | MOVL a, R13 \ // y0 = a | |
223 | ADDL R15, h \ // h = h + S1 + CH + k + w | |
224 | MOVL a, R15 \ // y2 = a | |
225 | ORL c, R13 \ // y0 = a|c | |
226 | ADDL h, d \ // d = d + h + S1 + CH + k + w | |
227 | ANDL c, R15 \ // y2 = a&c | |
228 | ANDL b, R13 \ // y0 = (a|c)&b | |
229 | ADDL R14, h \ // h = h + S1 + CH + k + w + S0 | |
230 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
231 | ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ | |
234 | 232 | |
235 | 233 | // func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) |
236 | 234 | TEXT ·blockAvx(SB), 7, $0 |
237 | 235 | |
238 | MOVQ h+0(FP), SI // SI: &h | |
239 | MOVQ message+24(FP), R8 // &message | |
240 | MOVQ lenmessage+32(FP), R9 // length of message | |
241 | CMPQ R9, $0 | |
242 | JEQ done_hash | |
243 | ADDQ R8, R9 | |
244 | MOVQ R9, _inp_end+64(FP) // store end of message | |
245 | ||
246 | // Register definition | |
247 | // a --> eax | |
248 | // b --> ebx | |
249 | // c --> ecx | |
250 | // d --> r8d | |
251 | // e --> edx | |
252 | // f --> r9d | |
253 | // g --> r10d | |
254 | // h --> r11d | |
255 | // | |
256 | // y0 --> r13d | |
257 | // y1 --> r14d | |
258 | // y2 --> r15d | |
259 | ||
260 | MOVL (0*4)(SI), AX // a = H0 | |
261 | MOVL (1*4)(SI), BX // b = H1 | |
262 | MOVL (2*4)(SI), CX // c = H2 | |
263 | MOVL (3*4)(SI), R8 // d = H3 | |
264 | MOVL (4*4)(SI), DX // e = H4 | |
265 | MOVL (5*4)(SI), R9 // f = H5 | |
266 | MOVL (6*4)(SI), R10 // g = H6 | |
267 | MOVL (7*4)(SI), R11 // h = H7 | |
236 | MOVQ h+0(FP), SI // SI: &h | |
237 | MOVQ message+24(FP), R8 // &message | |
238 | MOVQ lenmessage+32(FP), R9 // length of message | |
239 | CMPQ R9, $0 | |
240 | JEQ done_hash | |
241 | ADDQ R8, R9 | |
242 | MOVQ R9, _inp_end+64(FP) // store end of message | |
243 | ||
244 | // Register definition | |
245 | // a --> eax | |
246 | // b --> ebx | |
247 | // c --> ecx | |
248 | // d --> r8d | |
249 | // e --> edx | |
250 | // f --> r9d | |
251 | // g --> r10d | |
252 | // h --> r11d | |
253 | // | |
254 | // y0 --> r13d | |
255 | // y1 --> r14d | |
256 | // y2 --> r15d | |
257 | ||
258 | MOVL (0*4)(SI), AX // a = H0 | |
259 | MOVL (1*4)(SI), BX // b = H1 | |
260 | MOVL (2*4)(SI), CX // c = H2 | |
261 | MOVL (3*4)(SI), R8 // d = H3 | |
262 | MOVL (4*4)(SI), DX // e = H4 | |
263 | MOVL (5*4)(SI), R9 // f = H5 | |
264 | MOVL (6*4)(SI), R10 // g = H6 | |
265 | MOVL (7*4)(SI), R11 // h = H7 | |
268 | 266 | |
269 | 267 | MOVOU bflipMask<>(SB), X13 |
270 | MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA | |
271 | MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 | |
272 | ||
273 | MOVQ message+24(FP), SI // SI: &message | |
268 | MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA | |
269 | MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 | |
270 | ||
271 | MOVQ message+24(FP), SI // SI: &message | |
274 | 272 | |
275 | 273 | loop0: |
276 | 274 | LEAQ constants<>(SB), BP |
277 | 275 | |
278 | 276 | // byte swap first 16 dwords |
279 | MOVOU 0*16(SI), X4 | |
280 | LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13 | |
281 | MOVOU 1*16(SI), X5 | |
282 | LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13 | |
283 | MOVOU 2*16(SI), X6 | |
284 | LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13 | |
285 | MOVOU 3*16(SI), X7 | |
286 | LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13 | |
287 | ||
288 | MOVQ SI, _inp+72(FP) | |
289 | MOVD $0x3, DI | |
277 | MOVOU 0*16(SI), X4 | |
278 | LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13 | |
279 | MOVOU 1*16(SI), X5 | |
280 | LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13 | |
281 | MOVOU 2*16(SI), X6 | |
282 | LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13 | |
283 | MOVOU 3*16(SI), X7 | |
284 | LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13 | |
285 | ||
286 | MOVQ SI, _inp+72(FP) | |
287 | MOVD $0x3, DI | |
290 | 288 | |
291 | 289 | // schedule 48 input dwords, by doing 3 rounds of 16 each |
292 | 290 | loop1: |
293 | LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ | |
294 | MOVOU X9, _xfer+48(FP) | |
295 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
296 | ||
297 | LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */ | |
298 | MOVOU X9, _xfer+48(FP) | |
299 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
300 | ||
301 | LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */ | |
302 | MOVOU X9, _xfer+48(FP) | |
303 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
304 | ||
305 | LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */ | |
306 | MOVOU X9, _xfer+48(FP) | |
307 | ADDQ $64, BP | |
308 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
309 | ||
310 | SUBQ $1, DI | |
311 | JNE loop1 | |
312 | ||
313 | MOVD $0x2, DI | |
291 | LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ | |
292 | MOVOU X9, _xfer+48(FP) | |
293 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
294 | ||
295 | LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */ | |
296 | MOVOU X9, _xfer+48(FP) | |
297 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
298 | ||
299 | LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */ | |
300 | MOVOU X9, _xfer+48(FP) | |
301 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
302 | ||
303 | LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */ | |
304 | MOVOU X9, _xfer+48(FP) | |
305 | ADDQ $64, BP | |
306 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
307 | ||
308 | SUBQ $1, DI | |
309 | JNE loop1 | |
310 | ||
311 | MOVD $0x2, DI | |
312 | ||
314 | 313 | loop2: |
315 | LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ | |
316 | MOVOU X9, _xfer+48(FP) | |
317 | DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) | |
318 | DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) | |
319 | DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) | |
320 | DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) | |
321 | ||
322 | LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */ | |
323 | MOVOU X9, _xfer+48(FP) | |
324 | ADDQ $32, BP | |
325 | DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) | |
326 | DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) | |
327 | DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) | |
328 | DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) | |
329 | ||
330 | MOVOU X6, X4 | |
331 | MOVOU X7, X5 | |
332 | ||
333 | SUBQ $1, DI | |
334 | JNE loop2 | |
335 | ||
336 | MOVQ h+0(FP), SI // SI: &h | |
337 | ADDL (0*4)(SI), AX // H0 = a + H0 | |
338 | MOVL AX, (0*4)(SI) | |
339 | ADDL (1*4)(SI), BX // H1 = b + H1 | |
340 | MOVL BX, (1*4)(SI) | |
341 | ADDL (2*4)(SI), CX // H2 = c + H2 | |
342 | MOVL CX, (2*4)(SI) | |
343 | ADDL (3*4)(SI), R8 // H3 = d + H3 | |
344 | MOVL R8, (3*4)(SI) | |
345 | ADDL (4*4)(SI), DX // H4 = e + H4 | |
346 | MOVL DX, (4*4)(SI) | |
347 | ADDL (5*4)(SI), R9 // H5 = f + H5 | |
348 | MOVL R9, (5*4)(SI) | |
349 | ADDL (6*4)(SI), R10 // H6 = g + H6 | |
350 | MOVL R10, (6*4)(SI) | |
351 | ADDL (7*4)(SI), R11 // H7 = h + H7 | |
352 | MOVL R11, (7*4)(SI) | |
353 | ||
354 | MOVQ _inp+72(FP), SI | |
314 | LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ | |
315 | MOVOU X9, _xfer+48(FP) | |
316 | DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) | |
317 | DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) | |
318 | DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) | |
319 | DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) | |
320 | ||
321 | LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */ | |
322 | MOVOU X9, _xfer+48(FP) | |
323 | ADDQ $32, BP | |
324 | DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) | |
325 | DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) | |
326 | DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) | |
327 | DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) | |
328 | ||
329 | MOVOU X6, X4 | |
330 | MOVOU X7, X5 | |
331 | ||
332 | SUBQ $1, DI | |
333 | JNE loop2 | |
334 | ||
335 | MOVQ h+0(FP), SI // SI: &h | |
336 | ADDL (0*4)(SI), AX // H0 = a + H0 | |
337 | MOVL AX, (0*4)(SI) | |
338 | ADDL (1*4)(SI), BX // H1 = b + H1 | |
339 | MOVL BX, (1*4)(SI) | |
340 | ADDL (2*4)(SI), CX // H2 = c + H2 | |
341 | MOVL CX, (2*4)(SI) | |
342 | ADDL (3*4)(SI), R8 // H3 = d + H3 | |
343 | MOVL R8, (3*4)(SI) | |
344 | ADDL (4*4)(SI), DX // H4 = e + H4 | |
345 | MOVL DX, (4*4)(SI) | |
346 | ADDL (5*4)(SI), R9 // H5 = f + H5 | |
347 | MOVL R9, (5*4)(SI) | |
348 | ADDL (6*4)(SI), R10 // H6 = g + H6 | |
349 | MOVL R10, (6*4)(SI) | |
350 | ADDL (7*4)(SI), R11 // H7 = h + H7 | |
351 | MOVL R11, (7*4)(SI) | |
352 | ||
353 | MOVQ _inp+72(FP), SI | |
355 | 354 | ADDQ $64, SI |
356 | 355 | CMPQ _inp_end+64(FP), SI |
357 | JNE loop0 | |
356 | JNE loop0 | |
358 | 357 | |
359 | 358 | done_hash: |
360 | RET | |
359 | RET | |
361 | 360 | |
362 | 361 | // Constants table |
363 | 362 | DATA constants<>+0x0(SB)/8, $0x71374491428a2f98 |
0 | //+build !noasm | |
1 | ||
2 | package sha256 | |
3 | ||
4 | //go:noescape | |
5 | func blockSha(h *[8]uint32, message []uint8) |
0 | //+build !noasm !appengine | |
1 | ||
2 | // SHA intrinsic version of SHA256 | |
3 | ||
4 | // Minio Cloud Storage, (C) 2018 Minio, Inc. | |
5 | // | |
6 | // Licensed under the Apache License, Version 2.0 (the "License"); | |
7 | // you may not use this file except in compliance with the License. | |
8 | // You may obtain a copy of the License at | |
9 | // | |
10 | // http://www.apache.org/licenses/LICENSE-2.0 | |
11 | // | |
12 | // Unless required by applicable law or agreed to in writing, software | |
13 | // distributed under the License is distributed on an "AS IS" BASIS, | |
14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | // See the License for the specific language governing permissions and | |
16 | // limitations under the License. | |
17 | // | |
18 | ||
19 | #include "textflag.h" | |
20 | ||
21 | DATA K<>+0x00(SB)/4, $0x428a2f98 | |
22 | DATA K<>+0x04(SB)/4, $0x71374491 | |
23 | DATA K<>+0x08(SB)/4, $0xb5c0fbcf | |
24 | DATA K<>+0x0c(SB)/4, $0xe9b5dba5 | |
25 | DATA K<>+0x10(SB)/4, $0x3956c25b | |
26 | DATA K<>+0x14(SB)/4, $0x59f111f1 | |
27 | DATA K<>+0x18(SB)/4, $0x923f82a4 | |
28 | DATA K<>+0x1c(SB)/4, $0xab1c5ed5 | |
29 | DATA K<>+0x20(SB)/4, $0xd807aa98 | |
30 | DATA K<>+0x24(SB)/4, $0x12835b01 | |
31 | DATA K<>+0x28(SB)/4, $0x243185be | |
32 | DATA K<>+0x2c(SB)/4, $0x550c7dc3 | |
33 | DATA K<>+0x30(SB)/4, $0x72be5d74 | |
34 | DATA K<>+0x34(SB)/4, $0x80deb1fe | |
35 | DATA K<>+0x38(SB)/4, $0x9bdc06a7 | |
36 | DATA K<>+0x3c(SB)/4, $0xc19bf174 | |
37 | DATA K<>+0x40(SB)/4, $0xe49b69c1 | |
38 | DATA K<>+0x44(SB)/4, $0xefbe4786 | |
39 | DATA K<>+0x48(SB)/4, $0x0fc19dc6 | |
40 | DATA K<>+0x4c(SB)/4, $0x240ca1cc | |
41 | DATA K<>+0x50(SB)/4, $0x2de92c6f | |
42 | DATA K<>+0x54(SB)/4, $0x4a7484aa | |
43 | DATA K<>+0x58(SB)/4, $0x5cb0a9dc | |
44 | DATA K<>+0x5c(SB)/4, $0x76f988da | |
45 | DATA K<>+0x60(SB)/4, $0x983e5152 | |
46 | DATA K<>+0x64(SB)/4, $0xa831c66d | |
47 | DATA K<>+0x68(SB)/4, $0xb00327c8 | |
48 | DATA K<>+0x6c(SB)/4, $0xbf597fc7 | |
49 | DATA K<>+0x70(SB)/4, $0xc6e00bf3 | |
50 | DATA K<>+0x74(SB)/4, $0xd5a79147 | |
51 | DATA K<>+0x78(SB)/4, $0x06ca6351 | |
52 | DATA K<>+0x7c(SB)/4, $0x14292967 | |
53 | DATA K<>+0x80(SB)/4, $0x27b70a85 | |
54 | DATA K<>+0x84(SB)/4, $0x2e1b2138 | |
55 | DATA K<>+0x88(SB)/4, $0x4d2c6dfc | |
56 | DATA K<>+0x8c(SB)/4, $0x53380d13 | |
57 | DATA K<>+0x90(SB)/4, $0x650a7354 | |
58 | DATA K<>+0x94(SB)/4, $0x766a0abb | |
59 | DATA K<>+0x98(SB)/4, $0x81c2c92e | |
60 | DATA K<>+0x9c(SB)/4, $0x92722c85 | |
61 | DATA K<>+0xa0(SB)/4, $0xa2bfe8a1 | |
62 | DATA K<>+0xa4(SB)/4, $0xa81a664b | |
63 | DATA K<>+0xa8(SB)/4, $0xc24b8b70 | |
64 | DATA K<>+0xac(SB)/4, $0xc76c51a3 | |
65 | DATA K<>+0xb0(SB)/4, $0xd192e819 | |
66 | DATA K<>+0xb4(SB)/4, $0xd6990624 | |
67 | DATA K<>+0xb8(SB)/4, $0xf40e3585 | |
68 | DATA K<>+0xbc(SB)/4, $0x106aa070 | |
69 | DATA K<>+0xc0(SB)/4, $0x19a4c116 | |
70 | DATA K<>+0xc4(SB)/4, $0x1e376c08 | |
71 | DATA K<>+0xc8(SB)/4, $0x2748774c | |
72 | DATA K<>+0xcc(SB)/4, $0x34b0bcb5 | |
73 | DATA K<>+0xd0(SB)/4, $0x391c0cb3 | |
74 | DATA K<>+0xd4(SB)/4, $0x4ed8aa4a | |
75 | DATA K<>+0xd8(SB)/4, $0x5b9cca4f | |
76 | DATA K<>+0xdc(SB)/4, $0x682e6ff3 | |
77 | DATA K<>+0xe0(SB)/4, $0x748f82ee | |
78 | DATA K<>+0xe4(SB)/4, $0x78a5636f | |
79 | DATA K<>+0xe8(SB)/4, $0x84c87814 | |
80 | DATA K<>+0xec(SB)/4, $0x8cc70208 | |
81 | DATA K<>+0xf0(SB)/4, $0x90befffa | |
82 | DATA K<>+0xf4(SB)/4, $0xa4506ceb | |
83 | DATA K<>+0xf8(SB)/4, $0xbef9a3f7 | |
84 | DATA K<>+0xfc(SB)/4, $0xc67178f2 | |
85 | GLOBL K<>(SB), RODATA|NOPTR, $256 | |
86 | ||
87 | DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203 | |
88 | DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b | |
89 | GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16 | |
90 | ||
91 | // Register Usage | |
92 | // BX base address of constant table (constant) | |
93 | // DX hash_state (constant) | |
94 | // SI hash_data.data | |
95 | // DI hash_data.data + hash_data.length - 64 (constant) | |
96 | // X0 scratch | |
97 | // X1 scratch | |
98 | // X2 working hash state // ABEF | |
99 | // X3 working hash state // CDGH | |
100 | // X4 first 16 bytes of block | |
101 | // X5 second 16 bytes of block | |
102 | // X6 third 16 bytes of block | |
103 | // X7 fourth 16 bytes of block | |
104 | // X12 saved hash state // ABEF | |
105 | // X13 saved hash state // CDGH | |
106 | // X15 data shuffle mask (constant) | |
107 | ||
108 | TEXT ·blockSha(SB), NOSPLIT, $0-32 | |
109 | MOVQ h+0(FP), DX | |
110 | MOVQ message_base+8(FP), SI | |
111 | MOVQ message_len+16(FP), DI | |
112 | LEAQ -64(SI)(DI*1), DI | |
113 | MOVOU (DX), X2 | |
114 | MOVOU 16(DX), X1 | |
115 | MOVO X2, X3 | |
116 | PUNPCKLLQ X1, X2 | |
117 | PUNPCKHLQ X1, X3 | |
118 | PSHUFD $0x27, X2, X2 | |
119 | PSHUFD $0x27, X3, X3 | |
120 | MOVO SHUF_MASK<>(SB), X15 | |
121 | LEAQ K<>(SB), BX | |
122 | ||
123 | JMP TEST | |
124 | ||
125 | LOOP: | |
126 | MOVO X2, X12 | |
127 | MOVO X3, X13 | |
128 | ||
129 | // load block and shuffle | |
130 | MOVOU (SI), X4 | |
131 | MOVOU 16(SI), X5 | |
132 | MOVOU 32(SI), X6 | |
133 | MOVOU 48(SI), X7 | |
134 | PSHUFB X15, X4 | |
135 | PSHUFB X15, X5 | |
136 | PSHUFB X15, X6 | |
137 | PSHUFB X15, X7 | |
138 | ||
139 | #define ROUND456 \ | |
140 | PADDL X5, X0 \ | |
141 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | |
142 | MOVO X5, X1 \ | |
143 | LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4 | |
144 | PADDL X1, X6 \ | |
145 | LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5 | |
146 | PSHUFD $0x4e, X0, X0 \ | |
147 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | |
148 | LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 | |
149 | ||
150 | #define ROUND567 \ | |
151 | PADDL X6, X0 \ | |
152 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | |
153 | MOVO X6, X1 \ | |
154 | LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4 | |
155 | PADDL X1, X7 \ | |
156 | LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6 | |
157 | PSHUFD $0x4e, X0, X0 \ | |
158 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | |
159 | LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 | |
160 | ||
161 | #define ROUND674 \ | |
162 | PADDL X7, X0 \ | |
163 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | |
164 | MOVO X7, X1 \ | |
165 | LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4 | |
166 | PADDL X1, X4 \ | |
167 | LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7 | |
168 | PSHUFD $0x4e, X0, X0 \ | |
169 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | |
170 | LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7 | |
171 | ||
172 | #define ROUND745 \ | |
173 | PADDL X4, X0 \ | |
174 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | |
175 | MOVO X4, X1 \ | |
176 | LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4 | |
177 | PADDL X1, X5 \ | |
178 | LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4 | |
179 | PSHUFD $0x4e, X0, X0 \ | |
180 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | |
181 | LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4 | |
182 | ||
183 | // rounds 0-3 | |
184 | MOVO (BX), X0 | |
185 | PADDL X4, X0 | |
186 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | |
187 | PSHUFD $0x4e, X0, X0 | |
188 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | |
189 | ||
190 | // rounds 4-7 | |
191 | MOVO 1*16(BX), X0 | |
192 | PADDL X5, X0 | |
193 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | |
194 | PSHUFD $0x4e, X0, X0 | |
195 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | |
196 | LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 | |
197 | ||
198 | // rounds 8-11 | |
199 | MOVO 2*16(BX), X0 | |
200 | PADDL X6, X0 | |
201 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | |
202 | PSHUFD $0x4e, X0, X0 | |
203 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | |
204 | LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 | |
205 | ||
206 | MOVO 3*16(BX), X0; ROUND674 // rounds 12-15 | |
207 | MOVO 4*16(BX), X0; ROUND745 // rounds 16-19 | |
208 | MOVO 5*16(BX), X0; ROUND456 // rounds 20-23 | |
209 | MOVO 6*16(BX), X0; ROUND567 // rounds 24-27 | |
210 | MOVO 7*16(BX), X0; ROUND674 // rounds 28-31 | |
211 | MOVO 8*16(BX), X0; ROUND745 // rounds 32-35 | |
212 | MOVO 9*16(BX), X0; ROUND456 // rounds 36-39 | |
213 | MOVO 10*16(BX), X0; ROUND567 // rounds 40-43 | |
214 | MOVO 11*16(BX), X0; ROUND674 // rounds 44-47 | |
215 | MOVO 12*16(BX), X0; ROUND745 // rounds 48-51 | |
216 | ||
217 | // rounds 52-55 | |
218 | MOVO 13*16(BX), X0 | |
219 | PADDL X5, X0 | |
220 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | |
221 | MOVO X5, X1 | |
222 | LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4 | |
223 | PADDL X1, X6 | |
224 | LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5 | |
225 | PSHUFD $0x4e, X0, X0 | |
226 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | |
227 | ||
228 | // rounds 56-59 | |
229 | MOVO 14*16(BX), X0 | |
230 | PADDL X6, X0 | |
231 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | |
232 | MOVO X6, X1 | |
233 | LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4 | |
234 | PADDL X1, X7 | |
235 | LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6 | |
236 | PSHUFD $0x4e, X0, X0 | |
237 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | |
238 | ||
239 | // rounds 60-63 | |
240 | MOVO 15*16(BX), X0 | |
241 | PADDL X7, X0 | |
242 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | |
243 | PSHUFD $0x4e, X0, X0 | |
244 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | |
245 | ||
246 | PADDL X12, X2 | |
247 | PADDL X13, X3 | |
248 | ||
249 | ADDQ $64, SI | |
250 | ||
251 | TEST: | |
252 | CMPQ SI, DI | |
253 | JBE LOOP | |
254 | ||
255 | PSHUFD $0x4e, X3, X0 | |
256 | LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0 | |
257 | PSHUFD $0x4e, X2, X1 | |
258 | LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f | |
259 | PSHUFD $0x1b, X0, X0 | |
260 | PSHUFD $0x1b, X1, X1 | |
261 | ||
262 | MOVOU X0, (DX) | |
263 | MOVOU X1, 16(DX) | |
264 | ||
265 | RET |
0 | //+build !noasm | |
1 | ||
2 | package sha256 | |
3 | ||
4 | import ( | |
5 | "crypto/sha256" | |
6 | "encoding/binary" | |
7 | "testing" | |
8 | ) | |
9 | ||
10 | func sha256hash(m []byte) (r [32]byte) { | |
11 | var h [8]uint32 | |
12 | ||
13 | h[0] = 0x6a09e667 | |
14 | h[1] = 0xbb67ae85 | |
15 | h[2] = 0x3c6ef372 | |
16 | h[3] = 0xa54ff53a | |
17 | h[4] = 0x510e527f | |
18 | h[5] = 0x9b05688c | |
19 | h[6] = 0x1f83d9ab | |
20 | h[7] = 0x5be0cd19 | |
21 | ||
22 | blockSha(&h, m) | |
23 | l0 := len(m) | |
24 | l := l0 & (BlockSize - 1) | |
25 | m = m[l0-l:] | |
26 | ||
27 | var k [64]byte | |
28 | copy(k[:], m) | |
29 | ||
30 | k[l] = 0x80 | |
31 | ||
32 | if l >= 56 { | |
33 | blockSha(&h, k[:]) | |
34 | binary.LittleEndian.PutUint64(k[0:8], 0) | |
35 | binary.LittleEndian.PutUint64(k[8:16], 0) | |
36 | binary.LittleEndian.PutUint64(k[16:24], 0) | |
37 | binary.LittleEndian.PutUint64(k[24:32], 0) | |
38 | binary.LittleEndian.PutUint64(k[32:40], 0) | |
39 | binary.LittleEndian.PutUint64(k[40:48], 0) | |
40 | binary.LittleEndian.PutUint64(k[48:56], 0) | |
41 | } | |
42 | binary.BigEndian.PutUint64(k[56:64], uint64(l0)<<3) | |
43 | blockSha(&h, k[:]) | |
44 | ||
45 | binary.BigEndian.PutUint32(r[0:4], h[0]) | |
46 | binary.BigEndian.PutUint32(r[4:8], h[1]) | |
47 | binary.BigEndian.PutUint32(r[8:12], h[2]) | |
48 | binary.BigEndian.PutUint32(r[12:16], h[3]) | |
49 | binary.BigEndian.PutUint32(r[16:20], h[4]) | |
50 | binary.BigEndian.PutUint32(r[20:24], h[5]) | |
51 | binary.BigEndian.PutUint32(r[24:28], h[6]) | |
52 | binary.BigEndian.PutUint32(r[28:32], h[7]) | |
53 | ||
54 | return | |
55 | } | |
56 | ||
57 | func runTestSha(hashfunc func([]byte) [32]byte) bool { | |
58 | var m = []byte("This is a message. This is a message. This is a message. This is a message.") | |
59 | ||
60 | ar := hashfunc(m) | |
61 | br := sha256.Sum256(m) | |
62 | ||
63 | return ar == br | |
64 | } | |
65 | ||
66 | func TestSha0(t *testing.T) { | |
67 | if !runTestSha(Sum256) { | |
68 | t.Errorf("FAILED") | |
69 | } | |
70 | } | |
71 | ||
72 | func TestSha1(t *testing.T) { | |
73 | if sha && ssse3 && sse41 && !runTestSha(sha256hash) { | |
74 | t.Errorf("FAILED") | |
75 | } | |
76 | } |
34 | 34 | #include "textflag.h" |
35 | 35 | |
36 | 36 | #define ROTATE_XS \ |
37 | MOVOU X4, X15 \ | |
38 | MOVOU X5, X4 \ | |
39 | MOVOU X6, X5 \ | |
40 | MOVOU X7, X6 \ | |
41 | MOVOU X15, X7 | |
37 | MOVOU X4, X15 \ | |
38 | MOVOU X5, X4 \ | |
39 | MOVOU X6, X5 \ | |
40 | MOVOU X7, X6 \ | |
41 | MOVOU X15, X7 | |
42 | 42 | |
43 | 43 | // compute s0 four at a time and s1 two at a time |
44 | 44 | // compute W[-16] + W[-7] 4 at a time |
45 | 45 | #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \ |
46 | MOVL e, R13 \ /* y0 = e */ | |
47 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
48 | MOVL a, R14 \ /* y1 = a */ | |
49 | MOVOU X7, X0 \ | |
50 | LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */ | |
51 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
52 | XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
53 | MOVL f, R15 \ /* y2 = f */ | |
54 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
55 | XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
56 | XORL g, R15 \ /* y2 = f^g */ | |
57 | LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ | |
58 | XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */ | |
59 | ANDL e, R15 \ /* y2 = (f^g)&e */ | |
60 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
61 | \ /* */ | |
62 | \ /* compute s0 */ | |
63 | \ /* */ | |
64 | MOVOU X5, X1 \ | |
65 | LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */ | |
66 | XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
67 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
68 | XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
69 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
70 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
71 | ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
72 | MOVL a, R13 \ /* y0 = a */ | |
73 | ADDL R15, h \ /* h = h + S1 + CH + k + w */ | |
74 | \ /* ROTATE_ARGS */ | |
75 | MOVL a, R15 \ /* y2 = a */ | |
76 | MOVOU X1, X2 \ | |
77 | LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */ | |
78 | ORL c, R13 \ /* y0 = a|c */ | |
79 | ADDL h, d \ /* d = d + h + S1 + CH + k + w */ | |
80 | ANDL c, R15 \ /* y2 = a&c */ | |
81 | MOVOU X1, X3 \ | |
82 | LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */ | |
83 | ANDL b, R13 \ /* y0 = (a|c)&b */ | |
84 | ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ | |
85 | LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ | |
86 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
87 | ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
88 | \ /* ROTATE_ARGS */ | |
89 | MOVL d, R13 \ /* y0 = e */ | |
90 | MOVL h, R14 \ /* y1 = a */ | |
91 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
92 | XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
93 | MOVL e, R15 \ /* y2 = f */ | |
94 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
95 | MOVOU X1, X2 \ | |
96 | LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */ | |
97 | XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
98 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
99 | XORL f, R15 \ /* y2 = f^g */ | |
100 | MOVOU X1, X8 \ | |
101 | LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */ | |
102 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
103 | XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
104 | ANDL d, R15 \ /* y2 = (f^g)&e */ | |
105 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
106 | LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */ | |
107 | XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
108 | XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
109 | LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */ | |
110 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
111 | ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
112 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
113 | LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ | |
114 | MOVL h, R13 \ /* y0 = a */ | |
115 | ADDL R15, g \ /* h = h + S1 + CH + k + w */ | |
116 | MOVL h, R15 \ /* y2 = a */ | |
117 | MOVOU X3, X1 \ | |
118 | LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */ | |
119 | ORL b, R13 \ /* y0 = a|c */ | |
120 | ADDL g, c \ /* d = d + h + S1 + CH + k + w */ | |
121 | ANDL b, R15 \ /* y2 = a&c */ | |
122 | \ /* */ | |
123 | \ /* compute low s1 */ | |
124 | \ /* */ | |
125 | LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ | |
126 | ANDL a, R13 \ /* y0 = (a|c)&b */ | |
127 | ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */ | |
128 | LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ | |
129 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
130 | ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
131 | \ /* ROTATE_ARGS */ | |
132 | MOVL c, R13 \ /* y0 = e */ | |
133 | MOVL g, R14 \ /* y1 = a */ | |
134 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
135 | XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
136 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
137 | MOVL d, R15 \ /* y2 = f */ | |
138 | XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
139 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
140 | MOVOU X2, X8 \ | |
141 | LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ | |
142 | XORL e, R15 \ /* y2 = f^g */ | |
143 | MOVOU X2, X3 \ | |
144 | LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ | |
145 | XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
146 | ANDL c, R15 \ /* y2 = (f^g)&e */ | |
147 | LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ | |
148 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
149 | XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
150 | XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
151 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
152 | LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ | |
153 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
154 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
155 | ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
156 | LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ | |
157 | MOVL g, R13 \ /* y0 = a */ | |
158 | ADDL R15, f \ /* h = h + S1 + CH + k + w */ | |
159 | MOVL g, R15 \ /* y2 = a */ | |
160 | LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */ | |
161 | ORL a, R13 \ /* y0 = a|c */ | |
162 | ADDL f, b \ /* d = d + h + S1 + CH + k + w */ | |
163 | ANDL a, R15 \ /* y2 = a&c */ | |
164 | LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ | |
165 | ANDL h, R13 \ /* y0 = (a|c)&b */ | |
166 | ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */ | |
167 | \ /* */ | |
168 | \ /* compute high s1 */ | |
169 | \ /* */ | |
170 | LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ | |
171 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
172 | ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
173 | \ /* ROTATE_ARGS */ | |
174 | MOVL b, R13 \ /* y0 = e */ | |
175 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
176 | MOVL f, R14 \ /* y1 = a */ | |
177 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
178 | XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
179 | MOVL c, R15 \ /* y2 = f */ | |
180 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
181 | MOVOU X2, X11 \ | |
182 | LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ | |
183 | XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
184 | XORL d, R15 \ /* y2 = f^g */ | |
185 | MOVOU X2, X3 \ | |
186 | LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ | |
187 | XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
188 | ANDL b, R15 \ /* y2 = (f^g)&e */ | |
189 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
190 | LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ | |
191 | XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
192 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
193 | XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
194 | LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ | |
195 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
196 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
197 | ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
198 | LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ | |
199 | MOVL f, R13 \ /* y0 = a */ | |
200 | ADDL R15, e \ /* h = h + S1 + CH + k + w */ | |
201 | MOVL f, R15 \ /* y2 = a */ | |
202 | LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */ | |
203 | ORL h, R13 \ /* y0 = a|c */ | |
204 | ADDL e, a \ /* d = d + h + S1 + CH + k + w */ | |
205 | ANDL h, R15 \ /* y2 = a&c */ | |
206 | MOVOU X11, X4 \ | |
207 | LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ | |
208 | ANDL g, R13 \ /* y0 = (a|c)&b */ | |
209 | ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */ | |
210 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
211 | ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
212 | \ /* ROTATE_ARGS */ | |
213 | ROTATE_XS | |
214 | ||
46 | MOVL e, R13 \ // y0 = e | |
47 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
48 | MOVL a, R14 \ // y1 = a | |
49 | MOVOU X7, X0 \ | |
50 | LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */ | |
51 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
52 | XORL e, R13 \ // y0 = e ^ (e >> (25-11)) | |
53 | MOVL f, R15 \ // y2 = f | |
54 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
55 | XORL a, R14 \ // y1 = a ^ (a >> (22-13) | |
56 | XORL g, R15 \ // y2 = f^g | |
57 | LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ | |
58 | XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) | |
59 | ANDL e, R15 \ // y2 = (f^g)&e | |
60 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
61 | \ | |
62 | \ // compute s0 | |
63 | \ | |
64 | MOVOU X5, X1 \ | |
65 | LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */ | |
66 | XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
67 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
68 | XORL g, R15 \ // y2 = CH = ((f^g)&e)^g | |
69 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
70 | ADDL R13, R15 \ // y2 = S1 + CH | |
71 | ADDL _xfer+48(FP), R15 \ // y2 = k + w + S1 + CH | |
72 | MOVL a, R13 \ // y0 = a | |
73 | ADDL R15, h \ // h = h + S1 + CH + k + w | |
74 | \ // ROTATE_ARGS | |
75 | MOVL a, R15 \ // y2 = a | |
76 | MOVOU X1, X2 \ | |
77 | LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */ | |
78 | ORL c, R13 \ // y0 = a|c | |
79 | ADDL h, d \ // d = d + h + S1 + CH + k + w | |
80 | ANDL c, R15 \ // y2 = a&c | |
81 | MOVOU X1, X3 \ | |
82 | LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */ | |
83 | ANDL b, R13 \ // y0 = (a|c)&b | |
84 | ADDL R14, h \ // h = h + S1 + CH + k + w + S0 | |
85 | LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ | |
86 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
87 | ADDL R13, h \ // h = h + S1 + CH + k + w + S0 + MAJ | |
88 | \ // ROTATE_ARGS | |
89 | MOVL d, R13 \ // y0 = e | |
90 | MOVL h, R14 \ // y1 = a | |
91 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
92 | XORL d, R13 \ // y0 = e ^ (e >> (25-11)) | |
93 | MOVL e, R15 \ // y2 = f | |
94 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
95 | MOVOU X1, X2 \ | |
96 | LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */ | |
97 | XORL h, R14 \ // y1 = a ^ (a >> (22-13) | |
98 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
99 | XORL f, R15 \ // y2 = f^g | |
100 | MOVOU X1, X8 \ | |
101 | LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */ | |
102 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
103 | XORL d, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
104 | ANDL d, R15 \ // y2 = (f^g)&e | |
105 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
106 | LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */ | |
107 | XORL h, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
108 | XORL f, R15 \ // y2 = CH = ((f^g)&e)^g | |
109 | LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */ | |
110 | ADDL R13, R15 \ // y2 = S1 + CH | |
111 | ADDL _xfer+52(FP), R15 \ // y2 = k + w + S1 + CH | |
112 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
113 | LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ | |
114 | MOVL h, R13 \ // y0 = a | |
115 | ADDL R15, g \ // h = h + S1 + CH + k + w | |
116 | MOVL h, R15 \ // y2 = a | |
117 | MOVOU X3, X1 \ | |
118 | LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */ | |
119 | ORL b, R13 \ // y0 = a|c | |
120 | ADDL g, c \ // d = d + h + S1 + CH + k + w | |
121 | ANDL b, R15 \ // y2 = a&c | |
122 | \ | |
123 | \ // compute low s1 | |
124 | \ | |
125 | LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ | |
126 | ANDL a, R13 \ // y0 = (a|c)&b | |
127 | ADDL R14, g \ // h = h + S1 + CH + k + w + S0 | |
128 | LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ | |
129 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
130 | ADDL R13, g \ // h = h + S1 + CH + k + w + S0 + MAJ | |
131 | \ // ROTATE_ARGS | |
132 | MOVL c, R13 \ // y0 = e | |
133 | MOVL g, R14 \ // y1 = a | |
134 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
135 | XORL c, R13 \ // y0 = e ^ (e >> (25-11)) | |
136 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
137 | MOVL d, R15 \ // y2 = f | |
138 | XORL g, R14 \ // y1 = a ^ (a >> (22-13) | |
139 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
140 | MOVOU X2, X8 \ | |
141 | LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ | |
142 | XORL e, R15 \ // y2 = f^g | |
143 | MOVOU X2, X3 \ | |
144 | LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ | |
145 | XORL c, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
146 | ANDL c, R15 \ // y2 = (f^g)&e | |
147 | LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ | |
148 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
149 | XORL g, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
150 | XORL e, R15 \ // y2 = CH = ((f^g)&e)^g | |
151 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
152 | LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ | |
153 | ADDL R13, R15 \ // y2 = S1 + CH | |
154 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
155 | ADDL _xfer+56(FP), R15 \ // y2 = k + w + S1 + CH | |
156 | LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ | |
157 | MOVL g, R13 \ // y0 = a | |
158 | ADDL R15, f \ // h = h + S1 + CH + k + w | |
159 | MOVL g, R15 \ // y2 = a | |
160 | LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */ | |
161 | ORL a, R13 \ // y0 = a|c | |
162 | ADDL f, b \ // d = d + h + S1 + CH + k + w | |
163 | ANDL a, R15 \ // y2 = a&c | |
164 | LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ | |
165 | ANDL h, R13 \ // y0 = (a|c)&b | |
166 | ADDL R14, f \ // h = h + S1 + CH + k + w + S0 | |
167 | \ | |
168 | \ // compute high s1 | |
169 | \ | |
170 | LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ | |
171 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
172 | ADDL R13, f \ // h = h + S1 + CH + k + w + S0 + MAJ | |
173 | \ // ROTATE_ARGS | |
174 | MOVL b, R13 \ // y0 = e | |
175 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
176 | MOVL f, R14 \ // y1 = a | |
177 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
178 | XORL b, R13 \ // y0 = e ^ (e >> (25-11)) | |
179 | MOVL c, R15 \ // y2 = f | |
180 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
181 | MOVOU X2, X11 \ | |
182 | LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ | |
183 | XORL f, R14 \ // y1 = a ^ (a >> (22-13) | |
184 | XORL d, R15 \ // y2 = f^g | |
185 | MOVOU X2, X3 \ | |
186 | LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ | |
187 | XORL b, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
188 | ANDL b, R15 \ // y2 = (f^g)&e | |
189 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
190 | LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ | |
191 | XORL f, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
192 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
193 | XORL d, R15 \ // y2 = CH = ((f^g)&e)^g | |
194 | LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ | |
195 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
196 | ADDL R13, R15 \ // y2 = S1 + CH | |
197 | ADDL _xfer+60(FP), R15 \ // y2 = k + w + S1 + CH | |
198 | LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ | |
199 | MOVL f, R13 \ // y0 = a | |
200 | ADDL R15, e \ // h = h + S1 + CH + k + w | |
201 | MOVL f, R15 \ // y2 = a | |
202 | LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */ | |
203 | ORL h, R13 \ // y0 = a|c | |
204 | ADDL e, a \ // d = d + h + S1 + CH + k + w | |
205 | ANDL h, R15 \ // y2 = a&c | |
206 | MOVOU X11, X4 \ | |
207 | LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ | |
208 | ANDL g, R13 \ // y0 = (a|c)&b | |
209 | ADDL R14, e \ // h = h + S1 + CH + k + w + S0 | |
210 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
211 | ADDL R13, e \ // h = h + S1 + CH + k + w + S0 + MAJ | |
212 | \ // ROTATE_ARGS | |
213 | ROTATE_XS | |
215 | 214 | |
216 | 215 | #define DO_ROUND(a, b, c, d, e, f, g, h, offset) \ |
217 | MOVL e, R13 \ /* y0 = e */ | |
218 | ROLL $18, R13 \ /* y0 = e >> (25-11) */ | |
219 | MOVL a, R14 \ /* y1 = a */ | |
220 | XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ | |
221 | ROLL $23, R14 \ /* y1 = a >> (22-13) */ | |
222 | MOVL f, R15 \ /* y2 = f */ | |
223 | XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ | |
224 | ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ | |
225 | XORL g, R15 \ /* y2 = f^g */ | |
226 | XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ | |
227 | ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ | |
228 | ANDL e, R15 \ /* y2 = (f^g)&e */ | |
229 | XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ | |
230 | ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ | |
231 | XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ | |
232 | ADDL R13, R15 \ /* y2 = S1 + CH */ | |
233 | ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ | |
234 | ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */ | |
235 | MOVL a, R13 \ /* y0 = a */ | |
236 | ADDL R15, h \ /* h = h + S1 + CH + k + w */ | |
237 | MOVL a, R15 \ /* y2 = a */ | |
238 | ORL c, R13 \ /* y0 = a|c */ | |
239 | ADDL h, d \ /* d = d + h + S1 + CH + k + w */ | |
240 | ANDL c, R15 \ /* y2 = a&c */ | |
241 | ANDL b, R13 \ /* y0 = (a|c)&b */ | |
242 | ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ | |
243 | ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ | |
244 | ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */ | |
245 | ||
216 | MOVL e, R13 \ // y0 = e | |
217 | ROLL $18, R13 \ // y0 = e >> (25-11) | |
218 | MOVL a, R14 \ // y1 = a | |
219 | XORL e, R13 \ // y0 = e ^ (e >> (25-11)) | |
220 | ROLL $23, R14 \ // y1 = a >> (22-13) | |
221 | MOVL f, R15 \ // y2 = f | |
222 | XORL a, R14 \ // y1 = a ^ (a >> (22-13) | |
223 | ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) | |
224 | XORL g, R15 \ // y2 = f^g | |
225 | XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | |
226 | ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) | |
227 | ANDL e, R15 \ // y2 = (f^g)&e | |
228 | XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | |
229 | ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | |
230 | XORL g, R15 \ // y2 = CH = ((f^g)&e)^g | |
231 | ADDL R13, R15 \ // y2 = S1 + CH | |
232 | ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | |
233 | ADDL _xfer+offset(FP), R15 \ // y2 = k + w + S1 + CH | |
234 | MOVL a, R13 \ // y0 = a | |
235 | ADDL R15, h \ // h = h + S1 + CH + k + w | |
236 | MOVL a, R15 \ // y2 = a | |
237 | ORL c, R13 \ // y0 = a|c | |
238 | ADDL h, d \ // d = d + h + S1 + CH + k + w | |
239 | ANDL c, R15 \ // y2 = a&c | |
240 | ANDL b, R13 \ // y0 = (a|c)&b | |
241 | ADDL R14, h \ // h = h + S1 + CH + k + w + S0 | |
242 | ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) | |
243 | ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ | |
246 | 244 | |
247 | 245 | // func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) |
248 | 246 | TEXT ·blockSsse(SB), 7, $0 |
249 | 247 | |
250 | MOVQ h+0(FP), SI // SI: &h | |
251 | MOVQ message+24(FP), R8 // &message | |
252 | MOVQ lenmessage+32(FP), R9 // length of message | |
253 | CMPQ R9, $0 | |
254 | JEQ done_hash | |
255 | ADDQ R8, R9 | |
256 | MOVQ R9, _inp_end+64(FP) // store end of message | |
257 | ||
258 | // Register definition | |
259 | // a --> eax | |
260 | // b --> ebx | |
261 | // c --> ecx | |
262 | // d --> r8d | |
263 | // e --> edx | |
264 | // f --> r9d | |
265 | // g --> r10d | |
266 | // h --> r11d | |
267 | // | |
268 | // y0 --> r13d | |
269 | // y1 --> r14d | |
270 | // y2 --> r15d | |
271 | ||
272 | MOVL (0*4)(SI), AX // a = H0 | |
273 | MOVL (1*4)(SI), BX // b = H1 | |
274 | MOVL (2*4)(SI), CX // c = H2 | |
275 | MOVL (3*4)(SI), R8 // d = H3 | |
276 | MOVL (4*4)(SI), DX // e = H4 | |
277 | MOVL (5*4)(SI), R9 // f = H5 | |
278 | MOVL (6*4)(SI), R10 // g = H6 | |
279 | MOVL (7*4)(SI), R11 // h = H7 | |
248 | MOVQ h+0(FP), SI // SI: &h | |
249 | MOVQ message+24(FP), R8 // &message | |
250 | MOVQ lenmessage+32(FP), R9 // length of message | |
251 | CMPQ R9, $0 | |
252 | JEQ done_hash | |
253 | ADDQ R8, R9 | |
254 | MOVQ R9, _inp_end+64(FP) // store end of message | |
255 | ||
256 | // Register definition | |
257 | // a --> eax | |
258 | // b --> ebx | |
259 | // c --> ecx | |
260 | // d --> r8d | |
261 | // e --> edx | |
262 | // f --> r9d | |
263 | // g --> r10d | |
264 | // h --> r11d | |
265 | // | |
266 | // y0 --> r13d | |
267 | // y1 --> r14d | |
268 | // y2 --> r15d | |
269 | ||
270 | MOVL (0*4)(SI), AX // a = H0 | |
271 | MOVL (1*4)(SI), BX // b = H1 | |
272 | MOVL (2*4)(SI), CX // c = H2 | |
273 | MOVL (3*4)(SI), R8 // d = H3 | |
274 | MOVL (4*4)(SI), DX // e = H4 | |
275 | MOVL (5*4)(SI), R9 // f = H5 | |
276 | MOVL (6*4)(SI), R10 // g = H6 | |
277 | MOVL (7*4)(SI), R11 // h = H7 | |
280 | 278 | |
281 | 279 | MOVOU bflipMask<>(SB), X13 |
282 | MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA | |
283 | MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 | |
284 | ||
285 | MOVQ message+24(FP), SI // SI: &message | |
280 | MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA | |
281 | MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 | |
282 | ||
283 | MOVQ message+24(FP), SI // SI: &message | |
286 | 284 | |
287 | 285 | loop0: |
288 | 286 | LEAQ constants<>(SB), BP |
289 | 287 | |
290 | 288 | // byte swap first 16 dwords |
291 | MOVOU 0*16(SI), X4 | |
292 | LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13 | |
293 | MOVOU 1*16(SI), X5 | |
294 | LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13 | |
295 | MOVOU 2*16(SI), X6 | |
296 | LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13 | |
297 | MOVOU 3*16(SI), X7 | |
298 | LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13 | |
299 | ||
300 | MOVQ SI, _inp+72(FP) | |
301 | MOVD $0x3, DI | |
302 | ||
303 | // Align | |
304 | // nop WORD PTR [rax+rax*1+0x0] | |
289 | MOVOU 0*16(SI), X4 | |
290 | LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13 | |
291 | MOVOU 1*16(SI), X5 | |
292 | LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13 | |
293 | MOVOU 2*16(SI), X6 | |
294 | LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13 | |
295 | MOVOU 3*16(SI), X7 | |
296 | LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13 | |
297 | ||
298 | MOVQ SI, _inp+72(FP) | |
299 | MOVD $0x3, DI | |
300 | ||
301 | // Align | |
302 | // nop WORD PTR [rax+rax*1+0x0] | |
305 | 303 | |
306 | 304 | // schedule 48 input dwords, by doing 3 rounds of 16 each |
307 | 305 | loop1: |
308 | MOVOU X4, X9 | |
309 | LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ | |
310 | MOVOU X9, _xfer+48(FP) | |
311 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
312 | ||
313 | MOVOU X4, X9 | |
314 | LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ | |
315 | MOVOU X9, _xfer+48(FP) | |
316 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
317 | ||
318 | MOVOU X4, X9 | |
319 | LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */ | |
320 | MOVOU X9, _xfer+48(FP) | |
321 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
322 | ||
323 | MOVOU X4, X9 | |
324 | LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */ | |
325 | MOVOU X9, _xfer+48(FP) | |
326 | ADDQ $64, BP | |
327 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
328 | ||
329 | SUBQ $1, DI | |
330 | JNE loop1 | |
331 | ||
332 | MOVD $0x2, DI | |
306 | MOVOU X4, X9 | |
307 | LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ | |
308 | MOVOU X9, _xfer+48(FP) | |
309 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
310 | ||
311 | MOVOU X4, X9 | |
312 | LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ | |
313 | MOVOU X9, _xfer+48(FP) | |
314 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
315 | ||
316 | MOVOU X4, X9 | |
317 | LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */ | |
318 | MOVOU X9, _xfer+48(FP) | |
319 | FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) | |
320 | ||
321 | MOVOU X4, X9 | |
322 | LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */ | |
323 | MOVOU X9, _xfer+48(FP) | |
324 | ADDQ $64, BP | |
325 | FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) | |
326 | ||
327 | SUBQ $1, DI | |
328 | JNE loop1 | |
329 | ||
330 | MOVD $0x2, DI | |
331 | ||
333 | 332 | loop2: |
334 | MOVOU X4, X9 | |
335 | LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ | |
336 | MOVOU X9, _xfer+48(FP) | |
337 | DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) | |
338 | DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) | |
339 | DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) | |
340 | DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) | |
341 | ||
342 | MOVOU X5, X9 | |
343 | LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ | |
344 | MOVOU X9, _xfer+48(FP) | |
345 | ADDQ $32, BP | |
346 | DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) | |
347 | DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) | |
348 | DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) | |
349 | DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) | |
350 | ||
351 | MOVOU X6, X4 | |
352 | MOVOU X7, X5 | |
353 | ||
354 | SUBQ $1, DI | |
355 | JNE loop2 | |
356 | ||
357 | MOVQ h+0(FP), SI // SI: &h | |
358 | ADDL (0*4)(SI), AX // H0 = a + H0 | |
359 | MOVL AX, (0*4)(SI) | |
360 | ADDL (1*4)(SI), BX // H1 = b + H1 | |
361 | MOVL BX, (1*4)(SI) | |
362 | ADDL (2*4)(SI), CX // H2 = c + H2 | |
363 | MOVL CX, (2*4)(SI) | |
364 | ADDL (3*4)(SI), R8 // H3 = d + H3 | |
365 | MOVL R8, (3*4)(SI) | |
366 | ADDL (4*4)(SI), DX // H4 = e + H4 | |
367 | MOVL DX, (4*4)(SI) | |
368 | ADDL (5*4)(SI), R9 // H5 = f + H5 | |
369 | MOVL R9, (5*4)(SI) | |
370 | ADDL (6*4)(SI), R10 // H6 = g + H6 | |
371 | MOVL R10, (6*4)(SI) | |
372 | ADDL (7*4)(SI), R11 // H7 = h + H7 | |
373 | MOVL R11, (7*4)(SI) | |
374 | ||
375 | MOVQ _inp+72(FP), SI | |
333 | MOVOU X4, X9 | |
334 | LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ | |
335 | MOVOU X9, _xfer+48(FP) | |
336 | DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) | |
337 | DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) | |
338 | DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) | |
339 | DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) | |
340 | ||
341 | MOVOU X5, X9 | |
342 | LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ | |
343 | MOVOU X9, _xfer+48(FP) | |
344 | ADDQ $32, BP | |
345 | DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) | |
346 | DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) | |
347 | DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) | |
348 | DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) | |
349 | ||
350 | MOVOU X6, X4 | |
351 | MOVOU X7, X5 | |
352 | ||
353 | SUBQ $1, DI | |
354 | JNE loop2 | |
355 | ||
356 | MOVQ h+0(FP), SI // SI: &h | |
357 | ADDL (0*4)(SI), AX // H0 = a + H0 | |
358 | MOVL AX, (0*4)(SI) | |
359 | ADDL (1*4)(SI), BX // H1 = b + H1 | |
360 | MOVL BX, (1*4)(SI) | |
361 | ADDL (2*4)(SI), CX // H2 = c + H2 | |
362 | MOVL CX, (2*4)(SI) | |
363 | ADDL (3*4)(SI), R8 // H3 = d + H3 | |
364 | MOVL R8, (3*4)(SI) | |
365 | ADDL (4*4)(SI), DX // H4 = e + H4 | |
366 | MOVL DX, (4*4)(SI) | |
367 | ADDL (5*4)(SI), R9 // H5 = f + H5 | |
368 | MOVL R9, (5*4)(SI) | |
369 | ADDL (6*4)(SI), R10 // H6 = g + H6 | |
370 | MOVL R10, (6*4)(SI) | |
371 | ADDL (7*4)(SI), R11 // H7 = h + H7 | |
372 | MOVL R11, (7*4)(SI) | |
373 | ||
374 | MOVQ _inp+72(FP), SI | |
376 | 375 | ADDQ $64, SI |
377 | 376 | CMPQ _inp_end+64(FP), SI |
378 | JNE loop0 | |
377 | JNE loop0 | |
379 | 378 | |
380 | 379 | done_hash: |
381 | RET | |
380 | RET | |
382 | 381 | |
383 | 382 | // Constants table |
384 | 383 | DATA constants<>+0x0(SB)/8, $0x71374491428a2f98 |
21 | 21 | func blockAvx2Go(dig *digest, p []byte) {} |
22 | 22 | func blockAvxGo(dig *digest, p []byte) {} |
23 | 23 | func blockSsseGo(dig *digest, p []byte) {} |
24 | func blockShaGo(dig *digest, p []byte) {} |
45 | 45 | |
46 | 46 | dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] |
47 | 47 | } |
48 | ||
49 | func blockShaGo(dig *digest, p []byte) { | |
50 | ||
51 | blockSha(&dig.h, p) | |
52 | } |
20 | 20 | func blockAvx2Go(dig *digest, p []byte) {} |
21 | 21 | func blockAvxGo(dig *digest, p []byte) {} |
22 | 22 | func blockSsseGo(dig *digest, p []byte) {} |
23 | func blockShaGo(dig *digest, p []byte) {} | |
23 | 24 | func blockArmGo(dig *digest, p []byte) {} |
20 | 20 | func blockAvx2Go(dig *digest, p []byte) {} |
21 | 21 | func blockAvxGo(dig *digest, p []byte) {} |
22 | 22 | func blockSsseGo(dig *digest, p []byte) {} |
23 | func blockShaGo(dig *digest, p []byte) {} | |
23 | 24 | |
24 | 25 | //go:noescape |
25 | 26 | func blockArm(h []uint32, message []uint8) |