diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c56069f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.test \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 545066e..4f85db5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,18 +4,22 @@ os: - linux -- osx - -osx_image: xcode7.2 go: -- 1.6 -- 1.5 +- tip +- 1.12.x env: - ARCH=x86_64 - ARCH=i686 +matrix: + fast_finish: true + allow_failures: + - go: tip + script: - diff -au <(gofmt -d .) <(printf "") - go test -race -v ./... +- go vet -asmdecl . +- ./test-architectures.sh diff --git a/README.md b/README.md index 9501601..5282d83 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # sha256-simd -Accelerate SHA256 computations in pure Go using AVX512 and AVX2 for Intel and ARM64 for ARM. On AVX512 it provides an up to 8x improvement (over 3 GB/s per core) in comparison to AVX2. +Accelerate SHA256 computations in pure Go using AVX512, SHA Extensions and AVX2 for Intel and ARM64 for ARM. On AVX512 it provides an up to 8x improvement (over 3 GB/s per core) in comparison to AVX2. SHA Extensions give a performance boost of close to 4x over AVX2. ## Introduction @@ -8,7 +8,19 @@ This package uses Golang assembly. The AVX512 version is based on the Intel's "multi-buffer crypto library for IPSec" whereas the other Intel implementations are described in "Fast SHA-256 Implementations on Intel Architecture Processors" by J. Guilford et al. -## New: Support for AVX512 +## New: Support for Intel SHA Extensions + +Support for the Intel SHA Extensions has been added by Kristofer Peterson (@svenski123), originally developed for spacemeshos [here](https://github.com/spacemeshos/POET/issues/23). On CPUs that support it (known thus far Intel Celeron J3455 and AMD Ryzen) it gives a significant boost in performance (with thanks to @AudriusButkevicius for reporting the results; full results [here](https://github.com/minio/sha256-simd/pull/37#issuecomment-451607827)). + +``` +$ benchcmp avx2.txt sha-ext.txt +benchmark AVX2 MB/s SHA Ext MB/s speedup +BenchmarkHash5M 514.40 1975.17 3.84x +``` + +Thanks to Kristofer Peterson, we also added additional performance changes such as optimized padding, endian conversions which sped up all implementations i.e. Intel SHA alone while doubled performance for small sizes, the other changes increased everything roughly 50%. + +## Support for AVX512 We have added support for AVX512 which results in an up to 8x performance improvement over AVX2 (3.0 GHz Xeon Platinum 8124M CPU): @@ -24,7 +36,7 @@ Whereas the original Intel C implementation requires some sort of explicit scheduling of messages to be processed in parallel, for Golang it makes sense to take advantage of channels in order to group messages together and use channels as well for sending back the results (thereby effectively decoupling the calculations). We have implemented a fairly simple scheduling mechanism that seems to work well in practice. -Due to this differrent way of scheduling, we decided to use an explicit method to instantiate the AVX512 version. Essentially one or more AVX512 processing servers ([`Avx512Server`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L294)) have to be created whereby each server can hash over 3 GB/s on a single core. An `hash.Hash` object ([`Avx512Digest`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L45)) is then instantiated using one of these servers and used in the regular fashion: +Due to this different way of scheduling, we decided to use an explicit method to instantiate the AVX512 version. Essentially one or more AVX512 processing servers ([`Avx512Server`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L294)) have to be created whereby each server can hash over 3 GB/s on a single core. An `hash.Hash` object ([`Avx512Digest`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L45)) is then instantiated using one of these servers and used in the regular fashion: ```go import "github.com/minio/sha256-simd" @@ -66,6 +78,7 @@ | Processor | SIMD | Speed (MB/s) | | --------------------------------- | ------- | ------------:| | 3.0 GHz Intel Xeon Platinum 8124M | AVX512 | 3498 | +| 3.7 GHz AMD Ryzen 7 2700X | SHA Ext | 1979 | | 1.2 GHz ARM Cortex-A53 | ARM64 | 638 | | 3.0 GHz Intel Xeon Platinum 8124M | AVX2 | 449 | | 3.1 GHz Intel Core i7 | AVX | 362 | @@ -84,7 +97,7 @@ ## ARM SHA Extensions The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 acceleration as part of the [Cryptography Extensions](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0501f/CHDFJBCJ.html). Below you can see a small excerpt highlighting one of the rounds as is done for the SHA256 calculation process (for full code see [sha256block_arm64.s](https://github.com/minio/sha256-simd/blob/master/sha256block_arm64.s)). - + ``` sha256h q2, q3, v9.4s sha256h2 q3, q4, v9.4s @@ -100,7 +113,7 @@ ### Detailed benchmarks -Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/). +Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/). ``` minio@minio-arm:$ benchcmp golang.txt arm64.txt diff --git a/cpuid.go b/cpuid.go index bf9d6b0..878ad46 100644 --- a/cpuid.go +++ b/cpuid.go @@ -16,78 +16,104 @@ package sha256 // True when SIMD instructions are available. -var avx512 = haveAVX512() -var avx2 = haveAVX2() -var avx = haveAVX() -var ssse3 = haveSSSE3() +var avx512 bool +var avx2 bool +var avx bool +var sse bool +var sse2 bool +var sse3 bool +var ssse3 bool +var sse41 bool +var sse42 bool +var popcnt bool +var sha bool var armSha = haveArmSha() -// haveAVX returns true when there is AVX support -func haveAVX() bool { - _, _, c, _ := cpuid(1) +func init() { + var _xsave bool + var _osxsave bool + var _avx bool + var _avx2 bool + var _avx512f bool + var _avx512dq bool + // var _avx512pf bool + // var _avx512er bool + // var _avx512cd bool + var _avx512bw bool + var _avx512vl bool + var _sseState bool + var _avxState bool + var _opmaskState bool + var _zmmHI256State bool + var _hi16ZmmState bool - // Check XGETBV, OXSAVE and AVX bits - if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { - // Check for OS support - eax, _ := xgetbv(0) - return (eax & 0x6) == 0x6 - } - return false -} - -// haveAVX2 returns true when there is AVX2 support -func haveAVX2() bool { mfi, _, _, _ := cpuid(0) - // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. - if mfi >= 7 && haveAVX() { - _, ebx, _, _ := cpuidex(7, 0) - return (ebx & 0x00000020) != 0 + if mfi >= 1 { + _, _, c, d := cpuid(1) + + sse = (d & (1 << 25)) != 0 + sse2 = (d & (1 << 26)) != 0 + sse3 = (c & (1 << 0)) != 0 + ssse3 = (c & (1 << 9)) != 0 + sse41 = (c & (1 << 19)) != 0 + sse42 = (c & (1 << 20)) != 0 + popcnt = (c & (1 << 23)) != 0 + _xsave = (c & (1 << 26)) != 0 + _osxsave = (c & (1 << 27)) != 0 + _avx = (c & (1 << 28)) != 0 } - return false + + if mfi >= 7 { + _, b, _, _ := cpuid(7) + + _avx2 = (b & (1 << 5)) != 0 + _avx512f = (b & (1 << 16)) != 0 + _avx512dq = (b & (1 << 17)) != 0 + // _avx512pf = (b & (1 << 26)) != 0 + // _avx512er = (b & (1 << 27)) != 0 + // _avx512cd = (b & (1 << 28)) != 0 + _avx512bw = (b & (1 << 30)) != 0 + _avx512vl = (b & (1 << 31)) != 0 + sha = (b & (1 << 29)) != 0 + } + + // Stop here if XSAVE unsupported or not enabled + if !_xsave || !_osxsave { + return + } + + if _xsave && _osxsave { + a, _ := xgetbv(0) + + _sseState = (a & (1 << 1)) != 0 + _avxState = (a & (1 << 2)) != 0 + _opmaskState = (a & (1 << 5)) != 0 + _zmmHI256State = (a & (1 << 6)) != 0 + _hi16ZmmState = (a & (1 << 7)) != 0 + } else { + _sseState = true + } + + // Very unlikely that OS would enable XSAVE and then disable SSE + if !_sseState { + sse = false + sse2 = false + sse3 = false + ssse3 = false + sse41 = false + sse42 = false + } + + if _avxState { + avx = _avx + avx2 = _avx2 + } + + if _opmaskState && _zmmHI256State && _hi16ZmmState { + avx512 = (_avx512f && + _avx512dq && + _avx512bw && + _avx512vl) + } } - -// haveAVX512 returns true when there is AVX512 support -func haveAVX512() bool { - mfi, _, _, _ := cpuid(0) - - // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. - if mfi >= 7 { - _, _, c, _ := cpuid(1) - - // Only detect AVX-512 features if XGETBV is supported - if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { - // Check for OS support - eax, _ := xgetbv(0) - _, ebx, _, _ := cpuidex(7, 0) - - // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and - // ZMM16-ZMM31 state are enabled by OS) - /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). - if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { - if ebx&(1<<16) == 0 { - return false // no AVX512F - } - if ebx&(1<<17) == 0 { - return false // no AVX512DQ - } - if ebx&(1<<30) == 0 { - return false // no AVX512BW - } - if ebx&(1<<31) == 0 { - return false // no AVX512VL - } - return true - } - } - } - return false -} - -// haveSSSE3 returns true when there is SSSE3 support -func haveSSSE3() bool { - - _, _, c, _ := cpuid(1) - - return (c & 0x00000200) != 0 -} diff --git a/cpuid_386.s b/cpuid_386.s index f908ae8..1511cd6 100644 --- a/cpuid_386.s +++ b/cpuid_386.s @@ -24,30 +24,30 @@ // func cpuid(op uint32) (eax, ebx, ecx, edx uint32) TEXT ·cpuid(SB), 7, $0 - XORL CX, CX - MOVL op+0(FP), AX - CPUID - MOVL AX, eax+4(FP) - MOVL BX, ebx+8(FP) - MOVL CX, ecx+12(FP) - MOVL DX, edx+16(FP) - RET + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET // func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) TEXT ·cpuidex(SB), 7, $0 - MOVL op+0(FP), AX - MOVL op2+4(FP), CX - CPUID - MOVL AX, eax+8(FP) - MOVL BX, ebx+12(FP) - MOVL CX, ecx+16(FP) - MOVL DX, edx+20(FP) - RET + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET // func xgetbv(index uint32) (eax, edx uint32) TEXT ·xgetbv(SB), 7, $0 - MOVL index+0(FP), CX - BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV - MOVL AX, eax+4(FP) - MOVL DX, edx+8(FP) - RET + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET diff --git a/cpuid_amd64.s b/cpuid_amd64.s index 9a8a032..b0f4147 100644 --- a/cpuid_amd64.s +++ b/cpuid_amd64.s @@ -24,31 +24,30 @@ // func cpuid(op uint32) (eax, ebx, ecx, edx uint32) TEXT ·cpuid(SB), 7, $0 - XORQ CX, CX - MOVL op+0(FP), AX - CPUID - MOVL AX, eax+8(FP) - MOVL BX, ebx+12(FP) - MOVL CX, ecx+16(FP) - MOVL DX, edx+20(FP) - RET - + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET // func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) TEXT ·cpuidex(SB), 7, $0 - MOVL op+0(FP), AX - MOVL op2+4(FP), CX - CPUID - MOVL AX, eax+8(FP) - MOVL BX, ebx+12(FP) - MOVL CX, ecx+16(FP) - MOVL DX, edx+20(FP) - RET + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET // func xgetbv(index uint32) (eax, edx uint32) TEXT ·xgetbv(SB), 7, $0 - MOVL index+0(FP), CX - BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV - MOVL AX, eax+8(FP) - MOVL DX, edx+12(FP) - RET + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/cpuid_other.go b/cpuid_other.go index e26952a..3e44158 100644 --- a/cpuid_other.go +++ b/cpuid_other.go @@ -13,7 +13,7 @@ // limitations under the License. // -// +build ppc64 ppc64le mips mipsle mips64 mips64le s390x +// +build !386,!amd64,!arm,!arm64 arm64,!linux package sha256 diff --git a/cpuid_others_arm64.go b/cpuid_others_arm64.go deleted file mode 100644 index 0fb4022..0000000 --- a/cpuid_others_arm64.go +++ /dev/null @@ -1,35 +0,0 @@ -// +build arm64,!linux - -// Minio Cloud Storage, (C) 2016 Minio, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -package sha256 - -func cpuid(op uint32) (eax, ebx, ecx, edx uint32) { - return 0, 0, 0, 0 -} - -func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) { - return 0, 0, 0, 0 -} - -func xgetbv(index uint32) (eax, edx uint32) { - return 0, 0 -} - -// Check for sha2 instruction flag. -func haveArmSha() bool { - return false -} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4451e9e --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/minio/sha256-simd + +go 1.12 diff --git a/sha256.go b/sha256.go index f28236e..4e1f6d2 100644 --- a/sha256.go +++ b/sha256.go @@ -18,6 +18,7 @@ import ( "crypto/sha256" + "encoding/binary" "hash" "runtime" ) @@ -29,7 +30,7 @@ const BlockSize = 64 const ( - chunk = 64 + chunk = BlockSize init0 = 0x6A09E667 init1 = 0xBB67AE85 init2 = 0x3C6EF372 @@ -62,29 +63,44 @@ d.len = 0 } -func block(dig *digest, p []byte) { +type blockfuncType int + +const ( + blockfuncGeneric blockfuncType = iota + blockfuncAvx512 blockfuncType = iota + blockfuncAvx2 blockfuncType = iota + blockfuncAvx blockfuncType = iota + blockfuncSsse blockfuncType = iota + blockfuncSha blockfuncType = iota + blockfuncArm blockfuncType = iota +) + +var blockfunc blockfuncType + +func init() { is386bit := runtime.GOARCH == "386" isARM := runtime.GOARCH == "arm" - if is386bit || isARM { - blockGeneric(dig, p) - } - switch !is386bit && !isARM { + switch { + case is386bit || isARM: + blockfunc = blockfuncGeneric + case sha && ssse3 && sse41: + blockfunc = blockfuncSha case avx2: - blockAvx2Go(dig, p) + blockfunc = blockfuncAvx2 case avx: - blockAvxGo(dig, p) + blockfunc = blockfuncAvx case ssse3: - blockSsseGo(dig, p) + blockfunc = blockfuncSsse case armSha: - blockArmGo(dig, p) + blockfunc = blockfuncArm default: - blockGeneric(dig, p) + blockfunc = blockfuncGeneric } } // New returns a new hash.Hash computing the SHA256 checksum. func New() hash.Hash { - if avx2 || avx || ssse3 || armSha { + if blockfunc != blockfuncGeneric { d := new(digest) d.Reset() return d @@ -95,11 +111,12 @@ } // Sum256 - single caller sha256 helper -func Sum256(data []byte) [Size]byte { +func Sum256(data []byte) (result [Size]byte) { var d digest d.Reset() d.Write(data) - return d.checkSum() + result = d.checkSum() + return } // Return size of checksum @@ -141,37 +158,252 @@ } // Intermediate checksum function -func (d *digest) checkSum() [Size]byte { - len := d.len - // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. - var tmp [64]byte - tmp[0] = 0x80 - if len%64 < 56 { - d.Write(tmp[0 : 56-len%64]) - } else { - d.Write(tmp[0 : 64+56-len%64]) - } - - // Length in bits. - len <<= 3 - for i := uint(0); i < 8; i++ { - tmp[i] = byte(len >> (56 - 8*i)) - } - d.Write(tmp[0:8]) - - if d.nx != 0 { - panic("d.nx != 0") - } - - h := d.h[:] - - var digest [Size]byte - for i, s := range h { - digest[i*4] = byte(s >> 24) - digest[i*4+1] = byte(s >> 16) - digest[i*4+2] = byte(s >> 8) - digest[i*4+3] = byte(s) - } - - return digest -} +func (d *digest) checkSum() (digest [Size]byte) { + n := d.nx + + var k [64]byte + copy(k[:], d.x[:n]) + + k[n] = 0x80 + + if n >= 56 { + block(d, k[:]) + + // clear block buffer - go compiles this to optimal 1x xorps + 4x movups + // unfortunately expressing this more succinctly results in much worse code + k[0] = 0 + k[1] = 0 + k[2] = 0 + k[3] = 0 + k[4] = 0 + k[5] = 0 + k[6] = 0 + k[7] = 0 + k[8] = 0 + k[9] = 0 + k[10] = 0 + k[11] = 0 + k[12] = 0 + k[13] = 0 + k[14] = 0 + k[15] = 0 + k[16] = 0 + k[17] = 0 + k[18] = 0 + k[19] = 0 + k[20] = 0 + k[21] = 0 + k[22] = 0 + k[23] = 0 + k[24] = 0 + k[25] = 0 + k[26] = 0 + k[27] = 0 + k[28] = 0 + k[29] = 0 + k[30] = 0 + k[31] = 0 + k[32] = 0 + k[33] = 0 + k[34] = 0 + k[35] = 0 + k[36] = 0 + k[37] = 0 + k[38] = 0 + k[39] = 0 + k[40] = 0 + k[41] = 0 + k[42] = 0 + k[43] = 0 + k[44] = 0 + k[45] = 0 + k[46] = 0 + k[47] = 0 + k[48] = 0 + k[49] = 0 + k[50] = 0 + k[51] = 0 + k[52] = 0 + k[53] = 0 + k[54] = 0 + k[55] = 0 + k[56] = 0 + k[57] = 0 + k[58] = 0 + k[59] = 0 + k[60] = 0 + k[61] = 0 + k[62] = 0 + k[63] = 0 + } + binary.BigEndian.PutUint64(k[56:64], uint64(d.len)<<3) + block(d, k[:]) + + { + const i = 0 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 1 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 2 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 3 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 4 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 5 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 6 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + { + const i = 7 + binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i]) + } + + return +} + +func block(dig *digest, p []byte) { + if blockfunc == blockfuncSha { + blockShaGo(dig, p) + } else if blockfunc == blockfuncAvx2 { + blockAvx2Go(dig, p) + } else if blockfunc == blockfuncAvx { + blockAvxGo(dig, p) + } else if blockfunc == blockfuncSsse { + blockSsseGo(dig, p) + } else if blockfunc == blockfuncArm { + blockArmGo(dig, p) + } else if blockfunc == blockfuncGeneric { + blockGeneric(dig, p) + } +} + +func blockGeneric(dig *digest, p []byte) { + var w [64]uint32 + h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] + for len(p) >= chunk { + // Can interlace the computation of w with the + // rounds below if needed for speed. + for i := 0; i < 16; i++ { + j := i * 4 + w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) + } + for i := 16; i < 64; i++ { + v1 := w[i-2] + t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10) + v2 := w[i-15] + t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3) + w[i] = t1 + w[i-7] + t2 + w[i-16] + } + + a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7 + + for i := 0; i < 64; i++ { + t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] + + t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c)) + + h = g + g = f + f = e + e = d + t1 + d = c + c = b + b = a + a = t1 + t2 + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + h5 += f + h6 += g + h7 += h + + p = p[chunk:] + } + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 +} + +var _K = []uint32{ + 0x428a2f98, + 0x71374491, + 0xb5c0fbcf, + 0xe9b5dba5, + 0x3956c25b, + 0x59f111f1, + 0x923f82a4, + 0xab1c5ed5, + 0xd807aa98, + 0x12835b01, + 0x243185be, + 0x550c7dc3, + 0x72be5d74, + 0x80deb1fe, + 0x9bdc06a7, + 0xc19bf174, + 0xe49b69c1, + 0xefbe4786, + 0x0fc19dc6, + 0x240ca1cc, + 0x2de92c6f, + 0x4a7484aa, + 0x5cb0a9dc, + 0x76f988da, + 0x983e5152, + 0xa831c66d, + 0xb00327c8, + 0xbf597fc7, + 0xc6e00bf3, + 0xd5a79147, + 0x06ca6351, + 0x14292967, + 0x27b70a85, + 0x2e1b2138, + 0x4d2c6dfc, + 0x53380d13, + 0x650a7354, + 0x766a0abb, + 0x81c2c92e, + 0x92722c85, + 0xa2bfe8a1, + 0xa81a664b, + 0xc24b8b70, + 0xc76c51a3, + 0xd192e819, + 0xd6990624, + 0xf40e3585, + 0x106aa070, + 0x19a4c116, + 0x1e376c08, + 0x2748774c, + 0x34b0bcb5, + 0x391c0cb3, + 0x4ed8aa4a, + 0x5b9cca4f, + 0x682e6ff3, + 0x748f82ee, + 0x78a5636f, + 0x84c87814, + 0x8cc70208, + 0x90befffa, + 0xa4506ceb, + 0xbef9a3f7, + 0xc67178f2, +} diff --git a/sha256_test.go b/sha256_test.go index 089e815..89499e3 100644 --- a/sha256_test.go +++ b/sha256_test.go @@ -52,6 +52,7 @@ import ( "encoding/hex" "fmt" + "runtime" "strings" "testing" ) @@ -2208,25 +2209,56 @@ } func TestGolden(t *testing.T) { + blockfuncSaved := blockfunc + + defer func() { + blockfunc = blockfuncSaved + }() + + if true { + blockfunc = blockfuncGeneric + for _, g := range golden { + s := fmt.Sprintf("%x", Sum256([]byte(g.in))) + if Sum256([]byte(g.in)) != g.out { + t.Fatalf("Generic: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) + } + } + } + + if runtime.GOARCH == "386" || runtime.GOARCH == "arm" { + // doesn't support anything but the generic version. + return + } + + if sha && ssse3 && sse41 { + blockfunc = blockfuncSha + for _, g := range golden { + s := fmt.Sprintf("%x", Sum256([]byte(g.in))) + if Sum256([]byte(g.in)) != g.out { + t.Fatalf("SHA: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) + } + } + } if avx2 { + blockfunc = blockfuncAvx2 for _, g := range golden { s := fmt.Sprintf("%x", Sum256([]byte(g.in))) if Sum256([]byte(g.in)) != g.out { t.Fatalf("AVX2: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) } } - avx2 = false } if avx { + blockfunc = blockfuncAvx for _, g := range golden { s := fmt.Sprintf("%x", Sum256([]byte(g.in))) if Sum256([]byte(g.in)) != g.out { t.Fatalf("AVX: Sum256 function: sha256(%s) = %s want %s", g.in, s, hex.EncodeToString(g.out[:])) } } - avx = false } if ssse3 { + blockfunc = blockfuncSsse for _, g := range golden { s := fmt.Sprintf("%x", Sum256([]byte(g.in))) if Sum256([]byte(g.in)) != g.out { @@ -2255,6 +2287,7 @@ var buf = make([]byte, size) b.SetBytes(int64(size)) sum := make([]byte, bench.Size()) + b.ResetTimer() for i := 0; i < b.N; i++ { bench.Reset() bench.Write(buf[:size]) @@ -2262,9 +2295,41 @@ } } -func BenchmarkHash8Bytes(b *testing.B) { benchmarkSize(b, 8) } -func BenchmarkHash1K(b *testing.B) { benchmarkSize(b, 1024) } -func BenchmarkHash8K(b *testing.B) { benchmarkSize(b, 8192) } -func BenchmarkHash1MAvx2(b *testing.B) { benchmarkSize(b, 1024*1024) } -func BenchmarkHash5MAvx2(b *testing.B) { benchmarkSize(b, 5*1024*1024) } -func BenchmarkHash10MAvx2(b *testing.B) { benchmarkSize(b, 10*1024*1024) } +func BenchmarkHash(b *testing.B) { + algos := []struct { + n string + t blockfuncType + f bool + }{ + {"SHA_", blockfuncSha, sha && sse41 && ssse3}, + {"AVX2", blockfuncAvx2, avx2}, + {"AVX_", blockfuncAvx, avx}, + {"SSSE", blockfuncSsse, ssse3}, + {"GEN_", blockfuncGeneric, true}, + } + + sizes := []struct { + n string + f func(*testing.B, int) + s int + }{ + {"8Bytes", benchmarkSize, 1 << 3}, + {"1K", benchmarkSize, 1 << 10}, + {"8K", benchmarkSize, 1 << 13}, + {"1M", benchmarkSize, 1 << 20}, + {"5M", benchmarkSize, 5 << 20}, + {"10M", benchmarkSize, 5 << 21}, + } + + for _, a := range algos { + if a.f { + blockfuncSaved := blockfunc + blockfunc = a.t + for _, y := range sizes { + s := a.n + "/" + y.n + b.Run(s, func(b *testing.B) { y.f(b, y.s) }) + } + blockfunc = blockfuncSaved + } + } +} diff --git a/sha256blockAvx2_amd64.go b/sha256blockAvx2_amd64.go index 43ee7a9..52fcaee 100644 --- a/sha256blockAvx2_amd64.go +++ b/sha256blockAvx2_amd64.go @@ -1,4 +1,4 @@ -//+build !noasm +//+build !noasm,!appengine /* * Minio Cloud Storage, (C) 2016 Minio, Inc. diff --git a/sha256blockAvx2_amd64.s b/sha256blockAvx2_amd64.s index ff5d2a6..80b0b73 100644 --- a/sha256blockAvx2_amd64.s +++ b/sha256blockAvx2_amd64.s @@ -1,4 +1,4 @@ -//+build !noasm !appengine +//+build !noasm,!appengine // SHA256 implementation for AVX2 @@ -31,8 +31,6 @@ // github.com/minio/asm2plan9s to assemble Intel instructions to their Plan9 // equivalents // - -#include "textflag.h" DATA K256<>+0x000(SB)/8, $0x71374491428a2f98 DATA K256<>+0x008(SB)/8, $0xe9b5dba5b5c0fbcf @@ -114,1329 +112,1338 @@ GLOBL K256<>(SB), 8, $608 +// We need 0x220 stack space aligned on a 512 boundary, so for the +// worstcase-aligned SP we need twice this amount, being 1088 (=0x440) +// +// SP aligned end-aligned stacksize +// 100013d0 10001400 10001620 592 +// 100013d8 10001400 10001620 584 +// 100013e0 10001600 10001820 1088 +// 100013e8 10001600 10001820 1080 + // func blockAvx2(h []uint32, message []uint8) -TEXT ·blockAvx2(SB), 7, $0 - - MOVQ ctx+0(FP), DI // DI: &h - MOVQ inp+24(FP), SI // SI: &message - MOVQ inplength+32(FP), DX // len(message) - ADDQ SI, DX // end pointer of input - MOVQ SP, R11 // copy stack pointer - SUBQ $0x220, SP // sp -= 0x220 - ANDQ $0xfffffffffffffc00, SP // align stack frame - ADDQ $0x1c0, SP - MOVQ DI, 0x40(SP) // save ctx - MOVQ SI, 0x48(SP) // save input - MOVQ DX, 0x50(SP) // save end pointer - MOVQ R11, 0x58(SP) // save copy of stack pointer - - WORD $0xf8c5; BYTE $0x77 // vzeroupper - ADDQ $0x40, SI // input++ - MOVL (DI), AX - MOVQ SI, R12 // borrow $T1 - MOVL 4(DI), BX - CMPQ SI, DX // $_end - MOVL 8(DI), CX - LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */ - MOVL 12(DI), DX - MOVL 16(DI), R8 - MOVL 20(DI), R9 - MOVL 24(DI), R10 - MOVL 28(DI), R11 - - LEAQ K256<>(SB), BP - LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */ - LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */ - LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */ +TEXT ·blockAvx2(SB),$1088-48 + + MOVQ h+0(FP), DI // DI: &h + MOVQ message_base+24(FP), SI // SI: &message + MOVQ message_len+32(FP), DX // len(message) + ADDQ SI, DX // end pointer of input + MOVQ SP, R11 // copy stack pointer + ADDQ $0x220, SP // sp += 0x220 + ANDQ $0xfffffffffffffe00, SP // align stack frame + ADDQ $0x1c0, SP + MOVQ DI, 0x40(SP) // save ctx + MOVQ SI, 0x48(SP) // save input + MOVQ DX, 0x50(SP) // save end pointer + MOVQ R11, 0x58(SP) // save copy of stack pointer + + WORD $0xf8c5; BYTE $0x77 // vzeroupper + ADDQ $0x40, SI // input++ + MOVL (DI), AX + MOVQ SI, R12 // borrow $T1 + MOVL 4(DI), BX + CMPQ SI, DX // $_end + MOVL 8(DI), CX + LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */ + MOVL 12(DI), DX + MOVL 16(DI), R8 + MOVL 20(DI), R9 + MOVL 24(DI), R10 + MOVL 28(DI), R11 + + LEAQ K256<>(SB), BP + LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */ + LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */ + LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */ loop0: - LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10 - - // Load first 16 dwords from two blocks - MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40] - MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30] - MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20] - MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10] - - // Byte swap data and transpose data into high/low - LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1 - LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1 - LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7 - LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1 - LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7 - LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1 - - LEAQ K256<>(SB), BP - LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7 - LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp] - LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7 - LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp] - LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp] - LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp] - - LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4 - XORQ R14, R14 - LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5 - - ADDQ $-0x40, SP - MOVQ BX, DI - LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 - XORQ CX, DI // magic - LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7 - MOVQ R9, R12 - ADDQ $0x80,BP + LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10 + + // Load first 16 dwords from two blocks + MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40] + MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30] + MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20] + MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10] + + // Byte swap data and transpose data into high/low + LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1 + LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1 + LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7 + LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1 + LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7 + LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1 + + LEAQ K256<>(SB), BP + LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7 + LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp] + LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7 + LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp] + LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp] + LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp] + + LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4 + XORQ R14, R14 + LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5 + + ADDQ $-0x40, SP + MOVQ BX, DI + LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 + XORQ CX, DI // magic + LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7 + MOVQ R9, R12 + ADDQ $0x80, BP loop1: - // Schedule 48 input dwords, by doing 3 rounds of 12 each - // Note: SIMD instructions are interleaved with the SHA calculations - ADDQ $-0x40, SP - LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4 - - // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) - LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] - WORD $0x2145; BYTE $0xc4 // and r12d,r8d - LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 - LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4 - LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb - LONG $0x30048d42 // lea eax,[rax+r14*1] - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 - LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 - LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7 - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xc7 // mov r15d,eax - LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 - LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 - LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] - WORD $0x3141; BYTE $0xdf // xor r15d,ebx - LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe - LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd - LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 - LONG $0x1a148d42 // lea edx,[rdx+r11*1] - LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xdf31 // xor edi,ebx - LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] - WORD $0x8945; BYTE $0xc4 // mov r12d,r8d - LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb - - // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) - LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] - WORD $0x2141; BYTE $0xd4 // and r12d,edx - LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb - LONG $0x331c8d47 // lea r11d,[r11+r14*1] - LONG $0x22148d47 // lea r10d,[r10+r12*1] - LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb - LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 - LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 - LONG $0x22148d47 // lea r10d,[r10+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xdf // mov edi,r11d - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 - LONG $0x2a148d47 // lea r10d,[r10+r13*1] - WORD $0xc731 // xor edi,eax - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd - LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 - LONG $0x110c8d42 // lea ecx,[rcx+r10*1] - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xc7 // xor r15d,eax - LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3a148d47 // lea r10d,[r10+r15*1] - WORD $0x8941; BYTE $0xd4 // mov r12d,edx - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) - LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] - WORD $0x2141; BYTE $0xcc // and r12d,ecx - LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb - LONG $0x32148d47 // lea r10d,[r10+r14*1] - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 - LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xd7 // mov r15d,r10d - LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 - LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 - LONG $0x290c8d47 // lea r9d,[r9+r13*1] - WORD $0x3145; BYTE $0xdf // xor r15d,r11d - LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50 - LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd - LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 - LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xdf // xor edi,r11d - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d45 // lea r9d,[r9+rdi*1] - WORD $0x8941; BYTE $0xcc // mov r12d,ecx - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) - LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] - WORD $0x2141; BYTE $0xdc // and r12d,ebx - LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb - LONG $0x310c8d47 // lea r9d,[r9+r14*1] - LONG $0x20048d47 // lea r8d,[r8+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 - LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 - LONG $0x20048d47 // lea r8d,[r8+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xcf // mov edi,r9d - LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 - LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 - LONG $0x28048d47 // lea r8d,[r8+r13*1] - WORD $0x3144; BYTE $0xd7 // xor edi,r10d - LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0] - LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd - LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 - LONG $0x00048d42 // lea eax,[rax+r8*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xd7 // xor r15d,r10d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d47 // lea r8d,[r8+r15*1] - WORD $0x8941; BYTE $0xdc // mov r12d,ebx - - LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 - LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4 - - // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) - LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] - WORD $0x2141; BYTE $0xc4 // and r12d,eax - LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 - LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4 - LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb - LONG $0x30048d47 // lea r8d,[r8+r14*1] - LONG $0x22148d42 // lea edx,[rdx+r12*1] - LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 - LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 - LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7 - LONG $0x22148d42 // lea edx,[rdx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xc7 // mov r15d,r8d - LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 - LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 - LONG $0x2a148d42 // lea edx,[rdx+r13*1] - WORD $0x3145; BYTE $0xcf // xor r15d,r9d - LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe - LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd - LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 - LONG $0x131c8d45 // lea r11d,[r11+rdx*1] - LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xcf // xor edi,r9d - LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] - WORD $0x8941; BYTE $0xc4 // mov r12d,eax - LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb - - // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) - LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] - WORD $0x2145; BYTE $0xdc // and r12d,r11d - LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb - LONG $0x32148d42 // lea edx,[rdx+r14*1] - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb - LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 - LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xd789 // mov edi,edx - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 - LONG $0x290c8d42 // lea ecx,[rcx+r13*1] - WORD $0x3144; BYTE $0xc7 // xor edi,r8d - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd - LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 - LONG $0x0a148d45 // lea r10d,[r10+rcx*1] - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xc7 // xor r15d,r8d - LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d42 // lea ecx,[rcx+r15*1] - WORD $0x8945; BYTE $0xdc // mov r12d,r11d - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) - LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] - WORD $0x2145; BYTE $0xd4 // and r12d,r10d - LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb - LONG $0x310c8d42 // lea ecx,[rcx+r14*1] - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 - LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xcf // mov r15d,ecx - LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 - LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 - LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] - WORD $0x3141; BYTE $0xd7 // xor r15d,edx - LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50 - LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd - LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 - LONG $0x190c8d45 // lea r9d,[r9+rbx*1] - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xd731 // xor edi,edx - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] - WORD $0x8945; BYTE $0xd4 // mov r12d,r10d - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) - LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] - WORD $0x2145; BYTE $0xcc // and r12d,r9d - LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb - LONG $0x331c8d42 // lea ebx,[rbx+r14*1] - LONG $0x20048d42 // lea eax,[rax+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 - LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 - LONG $0x20048d42 // lea eax,[rax+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xdf89 // mov edi,ebx - LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 - LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 - LONG $0x28048d42 // lea eax,[rax+r13*1] - WORD $0xcf31 // xor edi,ecx - LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20] - LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd - LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 - LONG $0x00048d45 // lea r8d,[r8+rax*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xcf // xor r15d,ecx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d42 // lea eax,[rax+r15*1] - WORD $0x8945; BYTE $0xcc // mov r12d,r9d - - LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 - - LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40] - LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4 - - // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) - LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] - WORD $0x2145; BYTE $0xc4 // and r12d,r8d - LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 - LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4 - LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb - LONG $0x30048d42 // lea eax,[rax+r14*1] - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 - LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 - LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7 - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xc7 // mov r15d,eax - LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 - LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 - LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] - WORD $0x3141; BYTE $0xdf // xor r15d,ebx - LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe - LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd - LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 - LONG $0x1a148d42 // lea edx,[rdx+r11*1] - LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xdf31 // xor edi,ebx - LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] - WORD $0x8945; BYTE $0xc4 // mov r12d,r8d - LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb - - // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) - LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] - WORD $0x2141; BYTE $0xd4 // and r12d,edx - LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb - LONG $0x331c8d47 // lea r11d,[r11+r14*1] - LONG $0x22148d47 // lea r10d,[r10+r12*1] - LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb - LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 - LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 - LONG $0x22148d47 // lea r10d,[r10+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xdf // mov edi,r11d - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 - LONG $0x2a148d47 // lea r10d,[r10+r13*1] - WORD $0xc731 // xor edi,eax - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd - LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 - LONG $0x110c8d42 // lea ecx,[rcx+r10*1] - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xc7 // xor r15d,eax - LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3a148d47 // lea r10d,[r10+r15*1] - WORD $0x8941; BYTE $0xd4 // mov r12d,edx - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) - LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] - WORD $0x2141; BYTE $0xcc // and r12d,ecx - LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb - LONG $0x32148d47 // lea r10d,[r10+r14*1] - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 - LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xd7 // mov r15d,r10d - LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 - LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 - LONG $0x290c8d47 // lea r9d,[r9+r13*1] - WORD $0x3145; BYTE $0xdf // xor r15d,r11d - LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50 - LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd - LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 - LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xdf // xor edi,r11d - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d45 // lea r9d,[r9+rdi*1] - WORD $0x8941; BYTE $0xcc // mov r12d,ecx - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) - LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] - WORD $0x2141; BYTE $0xdc // and r12d,ebx - LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb - LONG $0x310c8d47 // lea r9d,[r9+r14*1] - LONG $0x20048d47 // lea r8d,[r8+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 - LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 - LONG $0x20048d47 // lea r8d,[r8+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xcf // mov edi,r9d - LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 - LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 - LONG $0x28048d47 // lea r8d,[r8+r13*1] - WORD $0x3144; BYTE $0xd7 // xor edi,r10d - LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40] - LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd - LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 - LONG $0x00048d42 // lea eax,[rax+r8*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xd7 // xor r15d,r10d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d47 // lea r8d,[r8+r15*1] - WORD $0x8941; BYTE $0xdc // mov r12d,ebx - - LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 - LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4 - - // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) - LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] - WORD $0x2141; BYTE $0xc4 // and r12d,eax - LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 - LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4 - LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb - LONG $0x30048d47 // lea r8d,[r8+r14*1] - LONG $0x22148d42 // lea edx,[rdx+r12*1] - LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 - LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 - LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7 - LONG $0x22148d42 // lea edx,[rdx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xc7 // mov r15d,r8d - LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 - LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 - LONG $0x2a148d42 // lea edx,[rdx+r13*1] - WORD $0x3145; BYTE $0xcf // xor r15d,r9d - LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe - LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd - LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 - LONG $0x131c8d45 // lea r11d,[r11+rdx*1] - LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xcf // xor edi,r9d - LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] - WORD $0x8941; BYTE $0xc4 // mov r12d,eax - LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb - - // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) - LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] - WORD $0x2145; BYTE $0xdc // and r12d,r11d - LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb - LONG $0x32148d42 // lea edx,[rdx+r14*1] - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb - LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 - LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xd789 // mov edi,edx - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 - LONG $0x290c8d42 // lea ecx,[rcx+r13*1] - WORD $0x3144; BYTE $0xc7 // xor edi,r8d - LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 - LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd - LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 - LONG $0x0a148d45 // lea r10d,[r10+rcx*1] - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xc7 // xor r15d,r8d - LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d42 // lea ecx,[rcx+r15*1] - WORD $0x8945; BYTE $0xdc // mov r12d,r11d - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) - LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] - WORD $0x2145; BYTE $0xd4 // and r12d,r10d - LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb - LONG $0x310c8d42 // lea ecx,[rcx+r14*1] - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 - LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xcf // mov r15d,ecx - LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 - LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 - LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] - WORD $0x3141; BYTE $0xd7 // xor r15d,edx - LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50 - LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd - LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 - LONG $0x190c8d45 // lea r9d,[r9+rbx*1] - LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xd731 // xor edi,edx - LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] - WORD $0x8945; BYTE $0xd4 // mov r12d,r10d - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - - // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) - LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] - WORD $0x2145; BYTE $0xcc // and r12d,r9d - LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 - LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 - LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb - LONG $0x331c8d42 // lea ebx,[rbx+r14*1] - LONG $0x20048d42 // lea eax,[rax+r12*1] - LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 - LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 - LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 - LONG $0x20048d42 // lea eax,[rax+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xdf89 // mov edi,ebx - LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 - LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 - LONG $0x28048d42 // lea eax,[rax+r13*1] - WORD $0xcf31 // xor edi,ecx - LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60] - LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd - LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 - LONG $0x00048d45 // lea r8d,[r8+rax*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xcf // xor r15d,ecx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d42 // lea eax,[rax+r15*1] - WORD $0x8945; BYTE $0xcc // mov r12d,r9d - - LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 - ADDQ $0x80, BP - - CMPB 0x3(BP),$0x0 - JNE loop1 - - // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40) - LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40] - WORD $0x2145; BYTE $0xc4 // and r12d,r8d - LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 - LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb - LONG $0x30048d42 // lea eax,[rax+r14*1] - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xc7 // mov r15d,eax - LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 - LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] - WORD $0x3141; BYTE $0xdf // xor r15d,ebx - LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd - LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 - LONG $0x1a148d42 // lea edx,[rdx+r11*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xdf31 // xor edi,ebx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] - WORD $0x8945; BYTE $0xc4 // mov r12d,r8d - - // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44) - LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44] - WORD $0x2141; BYTE $0xd4 // and r12d,edx - LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 - LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb - LONG $0x331c8d47 // lea r11d,[r11+r14*1] - LONG $0x22148d47 // lea r10d,[r10+r12*1] - LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 - LONG $0x22148d47 // lea r10d,[r10+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xdf // mov edi,r11d - LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 - LONG $0x2a148d47 // lea r10d,[r10+r13*1] - WORD $0xc731 // xor edi,eax - LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd - LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 - LONG $0x110c8d42 // lea ecx,[rcx+r10*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xc7 // xor r15d,eax - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3a148d47 // lea r10d,[r10+r15*1] - WORD $0x8941; BYTE $0xd4 // mov r12d,edx - - // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48) - LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48] - WORD $0x2141; BYTE $0xcc // and r12d,ecx - LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 - LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb - LONG $0x32148d47 // lea r10d,[r10+r14*1] - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xd7 // mov r15d,r10d - LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 - LONG $0x290c8d47 // lea r9d,[r9+r13*1] - WORD $0x3145; BYTE $0xdf // xor r15d,r11d - LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd - LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 - LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xdf // xor edi,r11d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d45 // lea r9d,[r9+rdi*1] - WORD $0x8941; BYTE $0xcc // mov r12d,ecx - - // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c) - LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c] - WORD $0x2141; BYTE $0xdc // and r12d,ebx - LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 - LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb - LONG $0x310c8d47 // lea r9d,[r9+r14*1] - LONG $0x20048d47 // lea r8d,[r8+r12*1] - LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 - LONG $0x20048d47 // lea r8d,[r8+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xcf // mov edi,r9d - LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 - LONG $0x28048d47 // lea r8d,[r8+r13*1] - WORD $0x3144; BYTE $0xd7 // xor edi,r10d - LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd - LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 - LONG $0x00048d42 // lea eax,[rax+r8*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xd7 // xor r15d,r10d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d47 // lea r8d,[r8+r15*1] - WORD $0x8941; BYTE $0xdc // mov r12d,ebx - - // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60) - LONG $0x60245403 // add edx,[rsp+0x60] - WORD $0x2141; BYTE $0xc4 // and r12d,eax - LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 - LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb - LONG $0x30048d47 // lea r8d,[r8+r14*1] - LONG $0x22148d42 // lea edx,[rdx+r12*1] - LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 - LONG $0x22148d42 // lea edx,[rdx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xc7 // mov r15d,r8d - LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 - LONG $0x2a148d42 // lea edx,[rdx+r13*1] - WORD $0x3145; BYTE $0xcf // xor r15d,r9d - LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd - LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 - LONG $0x131c8d45 // lea r11d,[r11+rdx*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xcf // xor edi,r9d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] - WORD $0x8941; BYTE $0xc4 // mov r12d,eax - - // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64) - LONG $0x64244c03 // add ecx,[rsp+0x64] - WORD $0x2145; BYTE $0xdc // and r12d,r11d - LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 - LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb - LONG $0x32148d42 // lea edx,[rdx+r14*1] - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xd789 // mov edi,edx - LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 - LONG $0x290c8d42 // lea ecx,[rcx+r13*1] - WORD $0x3144; BYTE $0xc7 // xor edi,r8d - LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd - LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 - LONG $0x0a148d45 // lea r10d,[r10+rcx*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xc7 // xor r15d,r8d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d42 // lea ecx,[rcx+r15*1] - WORD $0x8945; BYTE $0xdc // mov r12d,r11d - - // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68) - LONG $0x68245c03 // add ebx,[rsp+0x68] - WORD $0x2145; BYTE $0xd4 // and r12d,r10d - LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 - LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb - LONG $0x310c8d42 // lea ecx,[rcx+r14*1] - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xcf // mov r15d,ecx - LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 - LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] - WORD $0x3141; BYTE $0xd7 // xor r15d,edx - LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd - LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 - LONG $0x190c8d45 // lea r9d,[r9+rbx*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xd731 // xor edi,edx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] - WORD $0x8945; BYTE $0xd4 // mov r12d,r10d - - // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c) - LONG $0x6c244403 // add eax,[rsp+0x6c] - WORD $0x2145; BYTE $0xcc // and r12d,r9d - LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 - LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb - LONG $0x331c8d42 // lea ebx,[rbx+r14*1] - LONG $0x20048d42 // lea eax,[rax+r12*1] - LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 - LONG $0x20048d42 // lea eax,[rax+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xdf89 // mov edi,ebx - LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 - LONG $0x28048d42 // lea eax,[rax+r13*1] - WORD $0xcf31 // xor edi,ecx - LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd - LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 - LONG $0x00048d45 // lea r8d,[r8+rax*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xcf // xor r15d,ecx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d42 // lea eax,[rax+r15*1] - WORD $0x8945; BYTE $0xcc // mov r12d,r9d - - // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00) - LONG $0x241c0344 // add r11d,[rsp] - WORD $0x2145; BYTE $0xc4 // and r12d,r8d - LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 - LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb - LONG $0x30048d42 // lea eax,[rax+r14*1] - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xc7 // mov r15d,eax - LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 - LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] - WORD $0x3141; BYTE $0xdf // xor r15d,ebx - LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd - LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 - LONG $0x1a148d42 // lea edx,[rdx+r11*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xdf31 // xor edi,ebx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] - WORD $0x8945; BYTE $0xc4 // mov r12d,r8d - - // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04) - LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4] - WORD $0x2141; BYTE $0xd4 // and r12d,edx - LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 - LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb - LONG $0x331c8d47 // lea r11d,[r11+r14*1] - LONG $0x22148d47 // lea r10d,[r10+r12*1] - LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 - LONG $0x22148d47 // lea r10d,[r10+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xdf // mov edi,r11d - LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 - LONG $0x2a148d47 // lea r10d,[r10+r13*1] - WORD $0xc731 // xor edi,eax - LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd - LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 - LONG $0x110c8d42 // lea ecx,[rcx+r10*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xc7 // xor r15d,eax - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3a148d47 // lea r10d,[r10+r15*1] - WORD $0x8941; BYTE $0xd4 // mov r12d,edx - - // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08) - LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8] - WORD $0x2141; BYTE $0xcc // and r12d,ecx - LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 - LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb - LONG $0x32148d47 // lea r10d,[r10+r14*1] - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xd7 // mov r15d,r10d - LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 - LONG $0x290c8d47 // lea r9d,[r9+r13*1] - WORD $0x3145; BYTE $0xdf // xor r15d,r11d - LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd - LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 - LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xdf // xor edi,r11d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d45 // lea r9d,[r9+rdi*1] - WORD $0x8941; BYTE $0xcc // mov r12d,ecx - - // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c) - LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc] - WORD $0x2141; BYTE $0xdc // and r12d,ebx - LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 - LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb - LONG $0x310c8d47 // lea r9d,[r9+r14*1] - LONG $0x20048d47 // lea r8d,[r8+r12*1] - LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 - LONG $0x20048d47 // lea r8d,[r8+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xcf // mov edi,r9d - LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 - LONG $0x28048d47 // lea r8d,[r8+r13*1] - WORD $0x3144; BYTE $0xd7 // xor edi,r10d - LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd - LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 - LONG $0x00048d42 // lea eax,[rax+r8*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xd7 // xor r15d,r10d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d47 // lea r8d,[r8+r15*1] - WORD $0x8941; BYTE $0xdc // mov r12d,ebx - - // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20) - LONG $0x20245403 // add edx,[rsp+0x20] - WORD $0x2141; BYTE $0xc4 // and r12d,eax - LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 - LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb - LONG $0x30048d47 // lea r8d,[r8+r14*1] - LONG $0x22148d42 // lea edx,[rdx+r12*1] - LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 - LONG $0x22148d42 // lea edx,[rdx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xc7 // mov r15d,r8d - LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 - LONG $0x2a148d42 // lea edx,[rdx+r13*1] - WORD $0x3145; BYTE $0xcf // xor r15d,r9d - LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd - LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 - LONG $0x131c8d45 // lea r11d,[r11+rdx*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xcf // xor edi,r9d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] - WORD $0x8941; BYTE $0xc4 // mov r12d,eax - - // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24) - LONG $0x24244c03 // add ecx,[rsp+0x24] - WORD $0x2145; BYTE $0xdc // and r12d,r11d - LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 - LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb - LONG $0x32148d42 // lea edx,[rdx+r14*1] - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xd789 // mov edi,edx - LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 - LONG $0x290c8d42 // lea ecx,[rcx+r13*1] - WORD $0x3144; BYTE $0xc7 // xor edi,r8d - LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd - LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 - LONG $0x0a148d45 // lea r10d,[r10+rcx*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xc7 // xor r15d,r8d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d42 // lea ecx,[rcx+r15*1] - WORD $0x8945; BYTE $0xdc // mov r12d,r11d - - // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28) - LONG $0x28245c03 // add ebx,[rsp+0x28] - WORD $0x2145; BYTE $0xd4 // and r12d,r10d - LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 - LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb - LONG $0x310c8d42 // lea ecx,[rcx+r14*1] - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xcf // mov r15d,ecx - LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 - LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] - WORD $0x3141; BYTE $0xd7 // xor r15d,edx - LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd - LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 - LONG $0x190c8d45 // lea r9d,[r9+rbx*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xd731 // xor edi,edx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] - WORD $0x8945; BYTE $0xd4 // mov r12d,r10d - - // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c) - LONG $0x2c244403 // add eax,[rsp+0x2c] - WORD $0x2145; BYTE $0xcc // and r12d,r9d - LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 - LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb - LONG $0x331c8d42 // lea ebx,[rbx+r14*1] - LONG $0x20048d42 // lea eax,[rax+r12*1] - LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 - LONG $0x20048d42 // lea eax,[rax+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xdf89 // mov edi,ebx - LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 - LONG $0x28048d42 // lea eax,[rax+r13*1] - WORD $0xcf31 // xor edi,ecx - LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd - LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 - LONG $0x00048d45 // lea r8d,[r8+rax*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xcf // xor r15d,ecx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d42 // lea eax,[rax+r15*1] - WORD $0x8945; BYTE $0xcc // mov r12d,r9d - - MOVQ 0x200(SP), DI // $_ctx - ADDQ R14, AX - - LEAQ 0x1c0(SP), BP - - ADDL (DI), AX - ADDL 4(DI), BX - ADDL 8(DI), CX - ADDL 12(DI), DX - ADDL 16(DI), R8 - ADDL 20(DI), R9 - ADDL 24(DI), R10 - ADDL 28(DI), R11 - - MOVL AX, (DI) - MOVL BX, 4(DI) - MOVL CX, 8(DI) - MOVL DX, 12(DI) - MOVL R8, 16(DI) - MOVL R9, 20(DI) - MOVL R10, 24(DI) - MOVL R11, 28(DI) - - CMPQ SI, 0x50(BP) // $_end - JE done - - XORQ R14, R14 - MOVQ BX, DI - XORQ CX, DI // magic - MOVQ R9, R12 + // Schedule 48 input dwords, by doing 3 rounds of 12 each + // Note: SIMD instructions are interleaved with the SHA calculations + ADDQ $-0x40, SP + LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4 + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) + LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) + LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) + LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50 + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) + LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0] + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 + LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4 + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) + LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) + LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) + LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50 + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) + LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20] + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 + + LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40] + LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4 + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) + LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) + LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) + LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50 + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) + LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40] + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 + LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4 + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) + LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) + LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) + LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50 + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) + LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60] + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 + ADDQ $0x80, BP + + CMPB 0x3(BP), $0x0 + JNE loop1 + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40) + LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44) + LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48) + LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c) + LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60) + LONG $0x60245403 // add edx,[rsp+0x60] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64) + LONG $0x64244c03 // add ecx,[rsp+0x64] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68) + LONG $0x68245c03 // add ebx,[rsp+0x68] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c) + LONG $0x6c244403 // add eax,[rsp+0x6c] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00) + LONG $0x241c0344 // add r11d,[rsp] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04) + LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08) + LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c) + LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20) + LONG $0x20245403 // add edx,[rsp+0x20] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24) + LONG $0x24244c03 // add ecx,[rsp+0x24] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28) + LONG $0x28245c03 // add ebx,[rsp+0x28] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c) + LONG $0x2c244403 // add eax,[rsp+0x2c] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + MOVQ 0x200(SP), DI // $_ctx + ADDQ R14, AX + + LEAQ 0x1c0(SP), BP + + ADDL (DI), AX + ADDL 4(DI), BX + ADDL 8(DI), CX + ADDL 12(DI), DX + ADDL 16(DI), R8 + ADDL 20(DI), R9 + ADDL 24(DI), R10 + ADDL 28(DI), R11 + + MOVL AX, (DI) + MOVL BX, 4(DI) + MOVL CX, 8(DI) + MOVL DX, 12(DI) + MOVL R8, 16(DI) + MOVL R9, 20(DI) + MOVL R10, 24(DI) + MOVL R11, 28(DI) + + CMPQ SI, 0x50(BP) // $_end + JE done + + XORQ R14, R14 + MOVQ BX, DI + XORQ CX, DI // magic + MOVQ R9, R12 loop2: - // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10) - LONG $0x105d0344 // add r11d,[rbp+0x10] - WORD $0x2145; BYTE $0xc4 // and r12d,r8d - LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 - LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb - LONG $0x30048d42 // lea eax,[rax+r14*1] - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 - LONG $0x231c8d47 // lea r11d,[r11+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xc7 // mov r15d,eax - LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 - LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] - WORD $0x3141; BYTE $0xdf // xor r15d,ebx - LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd - LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 - LONG $0x1a148d42 // lea edx,[rdx+r11*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xdf31 // xor edi,ebx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] - WORD $0x8945; BYTE $0xc4 // mov r12d,r8d - - // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14) - LONG $0x14550344 // add r10d,[rbp+0x14] - WORD $0x2141; BYTE $0xd4 // and r12d,edx - LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 - LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb - LONG $0x331c8d47 // lea r11d,[r11+r14*1] - LONG $0x22148d47 // lea r10d,[r10+r12*1] - LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 - LONG $0x22148d47 // lea r10d,[r10+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xdf // mov edi,r11d - LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 - LONG $0x2a148d47 // lea r10d,[r10+r13*1] - WORD $0xc731 // xor edi,eax - LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd - LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 - LONG $0x110c8d42 // lea ecx,[rcx+r10*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xc7 // xor r15d,eax - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x3a148d47 // lea r10d,[r10+r15*1] - WORD $0x8941; BYTE $0xd4 // mov r12d,edx - - // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18) - LONG $0x184d0344 // add r9d,[rbp+0x18] - WORD $0x2141; BYTE $0xcc // and r12d,ecx - LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 - LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb - LONG $0x32148d47 // lea r10d,[r10+r14*1] - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 - LONG $0x210c8d47 // lea r9d,[r9+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xd7 // mov r15d,r10d - LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 - LONG $0x290c8d47 // lea r9d,[r9+r13*1] - WORD $0x3145; BYTE $0xdf // xor r15d,r11d - LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd - LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 - LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xdf // xor edi,r11d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d45 // lea r9d,[r9+rdi*1] - WORD $0x8941; BYTE $0xcc // mov r12d,ecx - - // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c) - LONG $0x1c450344 // add r8d,[rbp+0x1c] - WORD $0x2141; BYTE $0xdc // and r12d,ebx - LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 - LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb - LONG $0x310c8d47 // lea r9d,[r9+r14*1] - LONG $0x20048d47 // lea r8d,[r8+r12*1] - LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 - LONG $0x20048d47 // lea r8d,[r8+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8944; BYTE $0xcf // mov edi,r9d - LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 - LONG $0x28048d47 // lea r8d,[r8+r13*1] - WORD $0x3144; BYTE $0xd7 // xor edi,r10d - LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd - LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 - LONG $0x00048d42 // lea eax,[rax+r8*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xd7 // xor r15d,r10d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d47 // lea r8d,[r8+r15*1] - WORD $0x8941; BYTE $0xdc // mov r12d,ebx - - // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30) - WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30] - WORD $0x2141; BYTE $0xc4 // and r12d,eax - LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 - LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb - LONG $0x30048d47 // lea r8d,[r8+r14*1] - LONG $0x22148d42 // lea edx,[rdx+r12*1] - LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 - LONG $0x22148d42 // lea edx,[rdx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8945; BYTE $0xc7 // mov r15d,r8d - LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 - LONG $0x2a148d42 // lea edx,[rdx+r13*1] - WORD $0x3145; BYTE $0xcf // xor r15d,r9d - LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd - LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 - LONG $0x131c8d45 // lea r11d,[r11+rdx*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3144; BYTE $0xcf // xor edi,r9d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] - WORD $0x8941; BYTE $0xc4 // mov r12d,eax - - // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34) - WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34] - WORD $0x2145; BYTE $0xdc // and r12d,r11d - LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 - LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb - LONG $0x32148d42 // lea edx,[rdx+r14*1] - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 - LONG $0x210c8d42 // lea ecx,[rcx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xd789 // mov edi,edx - LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 - LONG $0x290c8d42 // lea ecx,[rcx+r13*1] - WORD $0x3144; BYTE $0xc7 // xor edi,r8d - LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd - LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 - LONG $0x0a148d45 // lea r10d,[r10+rcx*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3145; BYTE $0xc7 // xor r15d,r8d - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x390c8d42 // lea ecx,[rcx+r15*1] - WORD $0x8945; BYTE $0xdc // mov r12d,r11d - - // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38) - WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38] - WORD $0x2145; BYTE $0xd4 // and r12d,r10d - LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 - LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb - LONG $0x310c8d42 // lea ecx,[rcx+r14*1] - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax - WORD $0x3145; BYTE $0xfd // xor r13d,r15d - LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 - LONG $0x231c8d42 // lea ebx,[rbx+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0x8941; BYTE $0xcf // mov r15d,ecx - LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 - LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] - WORD $0x3141; BYTE $0xd7 // xor r15d,edx - LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd - LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 - LONG $0x190c8d45 // lea r9d,[r9+rbx*1] - WORD $0x2144; BYTE $0xff // and edi,r15d - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0xd731 // xor edi,edx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] - WORD $0x8945; BYTE $0xd4 // mov r12d,r10d - - // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c) - WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c] - WORD $0x2145; BYTE $0xcc // and r12d,r9d - LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 - LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb - LONG $0x331c8d42 // lea ebx,[rbx+r14*1] - LONG $0x20048d42 // lea eax,[rax+r12*1] - LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d - WORD $0x3141; BYTE $0xfd // xor r13d,edi - LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 - LONG $0x20048d42 // lea eax,[rax+r12*1] - WORD $0x3145; BYTE $0xf5 // xor r13d,r14d - WORD $0xdf89 // mov edi,ebx - LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 - LONG $0x28048d42 // lea eax,[rax+r13*1] - WORD $0xcf31 // xor edi,ecx - LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd - LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 - LONG $0x00048d45 // lea r8d,[r8+rax*1] - WORD $0x2141; BYTE $0xff // and r15d,edi - WORD $0x3145; BYTE $0xe6 // xor r14d,r12d - WORD $0x3141; BYTE $0xcf // xor r15d,ecx - WORD $0x3145; BYTE $0xee // xor r14d,r13d - LONG $0x38048d42 // lea eax,[rax+r15*1] - WORD $0x8945; BYTE $0xcc // mov r12d,r9d - - ADDQ $-0x40, BP - CMPQ BP, SP - JAE loop2 - - MOVQ 0x200(SP), DI // $_ctx - ADDQ R14, AX - - ADDQ $0x1c0, SP - - ADDL (DI), AX - ADDL 4(DI), BX - ADDL 8(DI), CX - ADDL 12(DI), DX - ADDL 16(DI), R8 - ADDL 20(DI), R9 - - ADDQ $0x80, SI // input += 2 - ADDL 24(DI), R10 - MOVQ SI, R12 - ADDL 28(DI), R11 - CMPQ SI, 0x50(SP) // input == _end - - MOVL AX, (DI) - LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */ - MOVL AX, (DI) - MOVL BX, 4(DI) - MOVL CX, 8(DI) - MOVL DX, 12(DI) - MOVL R8, 16(DI) - MOVL R9, 20(DI) - MOVL R10, 24(DI) - MOVL R11, 28(DI) - - JBE loop0 - LEAQ (SP), BP + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10) + LONG $0x105d0344 // add r11d,[rbp+0x10] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14) + LONG $0x14550344 // add r10d,[rbp+0x14] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18) + LONG $0x184d0344 // add r9d,[rbp+0x18] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c) + LONG $0x1c450344 // add r8d,[rbp+0x1c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30) + WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34) + WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38) + WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c) + WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + ADDQ $-0x40, BP + CMPQ BP, SP + JAE loop2 + + MOVQ 0x200(SP), DI // $_ctx + ADDQ R14, AX + + ADDQ $0x1c0, SP + + ADDL (DI), AX + ADDL 4(DI), BX + ADDL 8(DI), CX + ADDL 12(DI), DX + ADDL 16(DI), R8 + ADDL 20(DI), R9 + + ADDQ $0x80, SI // input += 2 + ADDL 24(DI), R10 + MOVQ SI, R12 + ADDL 28(DI), R11 + CMPQ SI, 0x50(SP) // input == _end + + MOVL AX, (DI) + LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */ + MOVL AX, (DI) + MOVL BX, 4(DI) + MOVL CX, 8(DI) + MOVL DX, 12(DI) + MOVL R8, 16(DI) + MOVL R9, 20(DI) + MOVL R10, 24(DI) + MOVL R11, 28(DI) + + JBE loop0 + LEAQ (SP), BP done: - MOVQ BP, SP - MOVQ 0x58(SP), SP - WORD $0xf8c5; BYTE $0x77 // vzeroupper - - RET - + MOVQ BP, SP + MOVQ 0x58(SP), SP // restore saved stack pointer + WORD $0xf8c5; BYTE $0x77 // vzeroupper + + RET + diff --git a/sha256blockAvx512_amd64.go b/sha256blockAvx512_amd64.go index 1bf45fd..db8e48d 100644 --- a/sha256blockAvx512_amd64.go +++ b/sha256blockAvx512_amd64.go @@ -1,4 +1,4 @@ -//+build !noasm +//+build !noasm,!appengine /* * Minio Cloud Storage, (C) 2017 Minio, Inc. @@ -28,20 +28,21 @@ ) //go:noescape -func sha256_x16_avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) - -// Do not start at 0 but next multiple of 16 so as to be able to +func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) + +// Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to // differentiate with default initialiation value of 0 -const Avx512ServerUid = 16 +const Avx512ServerUID = 16 var uidCounter uint64 +// NewAvx512 - initialize sha256 Avx512 implementation. func NewAvx512(a512srv *Avx512Server) hash.Hash { uid := atomic.AddUint64(&uidCounter, 1) return &Avx512Digest{uid: uid, a512srv: a512srv} } -// Type for computing SHA256 using AVX51 +// Avx512Digest - Type for computing SHA256 using Avx512 type Avx512Digest struct { uid uint64 a512srv *Avx512Server @@ -52,12 +53,13 @@ result [Size]byte } -// Return size of checksum +// Size - Return size of checksum func (d *Avx512Digest) Size() int { return Size } -// Return blocksize of checksum +// BlockSize - Return blocksize of checksum func (d Avx512Digest) BlockSize() int { return BlockSize } +// Reset - reset sha digest to its initial values func (d *Avx512Digest) Reset() { d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true} d.nx = 0 @@ -69,7 +71,7 @@ func (d *Avx512Digest) Write(p []byte) (nn int, err error) { if d.final { - return 0, errors.New("Avx512Digest already finalized. Reset first before writing again.") + return 0, errors.New("Avx512Digest already finalized. Reset first before writing again") } nn = len(p) @@ -94,7 +96,7 @@ return } -// Return sha256 sum in bytes +// Sum - Return sha256 sum in bytes func (d *Avx512Digest) Sum(in []byte) (result []byte) { if d.final { @@ -102,15 +104,16 @@ } trail := make([]byte, 0, 128) + trail = append(trail, d.x[:d.nx]...) len := d.len // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. var tmp [64]byte tmp[0] = 0x80 if len%64 < 56 { - trail = append(d.x[:d.nx], tmp[0:56-len%64]...) + trail = append(trail, tmp[0:56-len%64]...) } else { - trail = append(d.x[:d.nx], tmp[0:64+56-len%64]...) + trail = append(trail, tmp[0:64+56-len%64]...) } d.nx = 0 @@ -262,7 +265,7 @@ func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte { scratch := [512]byte{} - sha256_x16_avx512(digests, &scratch, &table, mask, input) + sha256X16Avx512(digests, &scratch, &table, mask, input) output := [16][Size]byte{} for i := 0; i < 16; i++ { @@ -290,7 +293,7 @@ sumCh chan [Size]byte } -// Type to implement 16x parallel handling of SHA256 invocations +// Avx512Server - Type to implement 16x parallel handling of SHA256 invocations type Avx512Server struct { blocksCh chan blockInput // Input channel totalIn int // Total number of inputs waiting to be processed @@ -298,14 +301,14 @@ digests map[uint64][Size]byte // Map of uids to (interim) digest results } -// Info for each lane +// Avx512LaneInfo - Info for each lane type Avx512LaneInfo struct { uid uint64 // unique identification for this SHA processing block []byte // input block to be processed outputCh chan [Size]byte // channel for output result } -// Create new object for parallel processing handling +// NewAvx512Server - Create new object for parallel processing handling func NewAvx512Server() *Avx512Server { a512srv := &Avx512Server{} a512srv.digests = make(map[uint64][Size]byte) @@ -316,7 +319,7 @@ return a512srv } -// Sole handler for reading from the input channel +// Process - Sole handler for reading from the input channel func (a512srv *Avx512Server) Process() { for { select { @@ -363,7 +366,7 @@ if lane.uid == uid { if lane.block != nil { a512srv.lanes[i] = Avx512LaneInfo{} // clear message - a512srv.totalIn -= 1 + a512srv.totalIn-- } } } @@ -373,7 +376,7 @@ } // Invoke assembly and send results back -func (a512srv *Avx512Server) blocks() (err error) { +func (a512srv *Avx512Server) blocks() { inputs := [16][]byte{} for i := range inputs { @@ -395,7 +398,6 @@ delete(a512srv.digests, uid) // Delete entry from hashmap } } - return } func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) { @@ -403,6 +405,7 @@ return len(p), nil } +// Sum - return sha256 sum in bytes for a given sum id. func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte { sumCh := make(chan [32]byte) a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh} diff --git a/sha256blockAvx512_amd64.s b/sha256blockAvx512_amd64.s index a6733f9..275bcac 100644 --- a/sha256blockAvx512_amd64.s +++ b/sha256blockAvx512_amd64.s @@ -1,207 +1,243 @@ -TEXT ·sha256_x16_avx512(SB), 7, $0 - MOVQ digests+0(FP), DI - MOVQ scratch+8(FP), R12 - MOVQ mask_len+32(FP), SI - MOVQ r14+24(FP), R13 - MOVQ (R13), R14 - LONG $0x92fbc1c4; BYTE $0xce - LEAQ inputs+48(FP), AX - QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07 - MOVQ table+16(FP), DX - WORD $0x3148; BYTE $0xc9 +//+build !noasm,!appengine + +TEXT ·sha256X16Avx512(SB), 7, $0 + MOVQ digests+0(FP), DI + MOVQ scratch+8(FP), R12 + MOVQ mask_len+32(FP), SI + MOVQ mask_base+24(FP), R13 + MOVQ (R13), R14 + LONG $0x92fbc1c4; BYTE $0xce + LEAQ inputs+48(FP), AX + QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07 + MOVQ table+16(FP), DX + WORD $0x3148; BYTE $0xc9 TESTQ $(1<<0), R14 - JE skipInput0 - MOVQ 0*24(AX), R9 - LONG $0x487cc162; WORD $0x0410; BYTE $0x09 + JE skipInput0 + MOVQ 0*24(AX), R9 + LONG $0x487cc162; WORD $0x0410; BYTE $0x09 + skipInput0: TESTQ $(1<<1), R14 - JE skipInput1 - MOVQ 1*24(AX), R9 - LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 + JE skipInput1 + MOVQ 1*24(AX), R9 + LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 + skipInput1: TESTQ $(1<<2), R14 - JE skipInput2 - MOVQ 2*24(AX), R9 - LONG $0x487cc162; WORD $0x1410; BYTE $0x09 + JE skipInput2 + MOVQ 2*24(AX), R9 + LONG $0x487cc162; WORD $0x1410; BYTE $0x09 + skipInput2: TESTQ $(1<<3), R14 - JE skipInput3 - MOVQ 3*24(AX), R9 - LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 + JE skipInput3 + MOVQ 3*24(AX), R9 + LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 + skipInput3: TESTQ $(1<<4), R14 - JE skipInput4 - MOVQ 4*24(AX), R9 - LONG $0x487cc162; WORD $0x2410; BYTE $0x09 + JE skipInput4 + MOVQ 4*24(AX), R9 + LONG $0x487cc162; WORD $0x2410; BYTE $0x09 + skipInput4: TESTQ $(1<<5), R14 - JE skipInput5 - MOVQ 5*24(AX), R9 - LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 + JE skipInput5 + MOVQ 5*24(AX), R9 + LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 + skipInput5: TESTQ $(1<<6), R14 - JE skipInput6 - MOVQ 6*24(AX), R9 - LONG $0x487cc162; WORD $0x3410; BYTE $0x09 + JE skipInput6 + MOVQ 6*24(AX), R9 + LONG $0x487cc162; WORD $0x3410; BYTE $0x09 + skipInput6: TESTQ $(1<<7), R14 - JE skipInput7 - MOVQ 7*24(AX), R9 - LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 + JE skipInput7 + MOVQ 7*24(AX), R9 + LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 + skipInput7: TESTQ $(1<<8), R14 - JE skipInput8 - MOVQ 8*24(AX), R9 - LONG $0x487c4162; WORD $0x0410; BYTE $0x09 + JE skipInput8 + MOVQ 8*24(AX), R9 + LONG $0x487c4162; WORD $0x0410; BYTE $0x09 + skipInput8: TESTQ $(1<<9), R14 - JE skipInput9 - MOVQ 9*24(AX), R9 - LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 + JE skipInput9 + MOVQ 9*24(AX), R9 + LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 + skipInput9: TESTQ $(1<<10), R14 - JE skipInput10 - MOVQ 10*24(AX), R9 - LONG $0x487c4162; WORD $0x1410; BYTE $0x09 + JE skipInput10 + MOVQ 10*24(AX), R9 + LONG $0x487c4162; WORD $0x1410; BYTE $0x09 + skipInput10: TESTQ $(1<<11), R14 - JE skipInput11 - MOVQ 11*24(AX), R9 - LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 + JE skipInput11 + MOVQ 11*24(AX), R9 + LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 + skipInput11: TESTQ $(1<<12), R14 - JE skipInput12 - MOVQ 12*24(AX), R9 - LONG $0x487c4162; WORD $0x2410; BYTE $0x09 + JE skipInput12 + MOVQ 12*24(AX), R9 + LONG $0x487c4162; WORD $0x2410; BYTE $0x09 + skipInput12: TESTQ $(1<<13), R14 - JE skipInput13 - MOVQ 13*24(AX), R9 - LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 + JE skipInput13 + MOVQ 13*24(AX), R9 + LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 + skipInput13: TESTQ $(1<<14), R14 - JE skipInput14 - MOVQ 14*24(AX), R9 - LONG $0x487c4162; WORD $0x3410; BYTE $0x09 + JE skipInput14 + MOVQ 14*24(AX), R9 + LONG $0x487c4162; WORD $0x3410; BYTE $0x09 + skipInput14: TESTQ $(1<<15), R14 - JE skipInput15 - MOVQ 15*24(AX), R9 - LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 + JE skipInput15 + MOVQ 15*24(AX), R9 + LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 + skipInput15: lloop: - LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX - LONG $0x487e7162; WORD $0x1a6f - MOVQ table+16(FP), DX - QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88 - LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX - LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8 - QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005 - JE lastLoop - ADDQ $8, R13 - MOVQ (R13), R14 - QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31 - TESTQ $(1<<0), R14 - JE skipNext0 - MOVQ 0*24(AX), R9 - LONG $0x487cc162; WORD $0x0410; BYTE $0x09 + LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX + LONG $0x487e7162; WORD $0x1a6f + MOVQ table+16(FP), DX + QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88 + LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX + LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8 + QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005 + JE lastLoop + ADDQ $8, R13 + MOVQ (R13), R14 + QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31 + TESTQ $(1<<0), R14 + JE skipNext0 + MOVQ 0*24(AX), R9 + LONG $0x487cc162; WORD $0x0410; BYTE $0x09 + skipNext0: - QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32 + QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32 TESTQ $(1<<1), R14 - JE skipNext1 - MOVQ 1*24(AX), R9 - LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 + JE skipNext1 + MOVQ 1*24(AX), R9 + LONG $0x487cc162; WORD $0x0c10; BYTE $0x09 + skipNext1: - QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33 + QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33 TESTQ $(1<<2), R14 - JE skipNext2 - MOVQ 2*24(AX), R9 - LONG $0x487cc162; WORD $0x1410; BYTE $0x09 + JE skipNext2 + MOVQ 2*24(AX), R9 + LONG $0x487cc162; WORD $0x1410; BYTE $0x09 + skipNext2: - QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34 + QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34 TESTQ $(1<<3), R14 - JE skipNext3 - MOVQ 3*24(AX), R9 - LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 + JE skipNext3 + MOVQ 3*24(AX), R9 + LONG $0x487cc162; WORD $0x1c10; BYTE $0x09 + skipNext3: - QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35 + QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35 TESTQ $(1<<4), R14 - JE skipNext4 - MOVQ 4*24(AX), R9 - LONG $0x487cc162; WORD $0x2410; BYTE $0x09 + JE skipNext4 + MOVQ 4*24(AX), R9 + LONG $0x487cc162; WORD $0x2410; BYTE $0x09 + skipNext4: - QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36 + QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36 TESTQ $(1<<5), R14 - JE skipNext5 - MOVQ 5*24(AX), R9 - LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 + JE skipNext5 + MOVQ 5*24(AX), R9 + LONG $0x487cc162; WORD $0x2c10; BYTE $0x09 + skipNext5: - QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37 + QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37 TESTQ $(1<<6), R14 - JE skipNext6 - MOVQ 6*24(AX), R9 - LONG $0x487cc162; WORD $0x3410; BYTE $0x09 + JE skipNext6 + MOVQ 6*24(AX), R9 + LONG $0x487cc162; WORD $0x3410; BYTE $0x09 + skipNext6: - QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38 + QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38 TESTQ $(1<<7), R14 - JE skipNext7 - MOVQ 7*24(AX), R9 - LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 + JE skipNext7 + MOVQ 7*24(AX), R9 + LONG $0x487cc162; WORD $0x3c10; BYTE $0x09 + skipNext7: - QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39 + QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39 TESTQ $(1<<8), R14 - JE skipNext8 - MOVQ 8*24(AX), R9 - LONG $0x487c4162; WORD $0x0410; BYTE $0x09 + JE skipNext8 + MOVQ 8*24(AX), R9 + LONG $0x487c4162; WORD $0x0410; BYTE $0x09 + skipNext8: - QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a + QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a TESTQ $(1<<9), R14 - JE skipNext9 - MOVQ 9*24(AX), R9 - LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 + JE skipNext9 + MOVQ 9*24(AX), R9 + LONG $0x487c4162; WORD $0x0c10; BYTE $0x09 + skipNext9: - QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b + QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b TESTQ $(1<<10), R14 - JE skipNext10 - MOVQ 10*24(AX), R9 - LONG $0x487c4162; WORD $0x1410; BYTE $0x09 + JE skipNext10 + MOVQ 10*24(AX), R9 + LONG $0x487c4162; WORD $0x1410; BYTE $0x09 + skipNext10: - QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c + QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c TESTQ $(1<<11), R14 - JE skipNext11 - MOVQ 11*24(AX), R9 - LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 + JE skipNext11 + MOVQ 11*24(AX), R9 + LONG $0x487c4162; WORD $0x1c10; BYTE $0x09 + skipNext11: - QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d + QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d TESTQ $(1<<12), R14 - JE skipNext12 - MOVQ 12*24(AX), R9 - LONG $0x487c4162; WORD $0x2410; BYTE $0x09 + JE skipNext12 + MOVQ 12*24(AX), R9 + LONG $0x487c4162; WORD $0x2410; BYTE $0x09 + skipNext12: - QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e + QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e TESTQ $(1<<13), R14 - JE skipNext13 - MOVQ 13*24(AX), R9 - LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 + JE skipNext13 + MOVQ 13*24(AX), R9 + LONG $0x487c4162; WORD $0x2c10; BYTE $0x09 + skipNext13: - QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f + QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f TESTQ $(1<<14), R14 - JE skipNext14 - MOVQ 14*24(AX), R9 - LONG $0x487c4162; WORD $0x3410; BYTE $0x09 + JE skipNext14 + MOVQ 14*24(AX), R9 + LONG $0x487c4162; WORD $0x3410; BYTE $0x09 + skipNext14: - QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40 + QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40 TESTQ $(1<<15), R14 - JE skipNext15 - MOVQ 15*24(AX), R9 - LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 + JE skipNext15 + MOVQ 15*24(AX), R9 + LONG $0x487c4162; WORD $0x3c10; BYTE $0x09 + skipNext15: QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0xc4fbfe4945d16207; LONG $0xce92fbc1 - JMP lloop + JMP lloop + lastLoop: - QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f + QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f VZEROUPPER RET + DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203 DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203 diff --git a/sha256blockAvx512_amd64_test.go b/sha256blockAvx512_amd64_test.go index 807b7f7..bab089c 100644 --- a/sha256blockAvx512_amd64_test.go +++ b/sha256blockAvx512_amd64_test.go @@ -1,3 +1,5 @@ +//+build !noasm,!appengine + /* * Minio Cloud Storage, (C) 2017 Minio, Inc. * @@ -210,14 +212,14 @@ for i := 0; i < 16; i++ { input := make([]byte, 64) copy(input, golden[offset+i].in) - server.Write(uint64(Avx512ServerUid+i), input) + server.Write(uint64(Avx512ServerUID+i), input) } // Second block of 64 bytes for i := 0; i < 16; i++ { input := make([]byte, 64) copy(input, golden[offset+i].in[64:]) - server.Write(uint64(Avx512ServerUid+i), input) + server.Write(uint64(Avx512ServerUID+i), input) } wg := sync.WaitGroup{} @@ -241,7 +243,7 @@ t.Fatalf("Sum256 function: sha256(%s) = %s want %s", golden[offset+i].in, hex.EncodeToString(output[:]), hex.EncodeToString(golden[offset+i].out[:])) } wg.Done() - }(i, uint64(Avx512ServerUid+i), input) + }(i, uint64(Avx512ServerUID+i), input) } wg.Wait() diff --git a/sha256blockAvx_amd64.go b/sha256blockAvx_amd64.go index eb8a0ff..c2f7118 100644 --- a/sha256blockAvx_amd64.go +++ b/sha256blockAvx_amd64.go @@ -1,4 +1,4 @@ -//+build !noasm +//+build !noasm,!appengine /* * Minio Cloud Storage, (C) 2016 Minio, Inc. diff --git a/sha256blockAvx_amd64.s b/sha256blockAvx_amd64.s index 512e3cf..9f444d4 100644 --- a/sha256blockAvx_amd64.s +++ b/sha256blockAvx_amd64.s @@ -1,4 +1,4 @@ -//+build !noasm !appengine +//+build !noasm,!appengine // SHA256 implementation for AVX @@ -35,330 +35,329 @@ #include "textflag.h" #define ROTATE_XS \ - MOVOU X4, X15 \ - MOVOU X5, X4 \ - MOVOU X6, X5 \ - MOVOU X7, X6 \ - MOVOU X15, X7 + MOVOU X4, X15 \ + MOVOU X5, X4 \ + MOVOU X6, X5 \ + MOVOU X7, X6 \ + MOVOU X15, X7 // compute s0 four at a time and s1 two at a time // compute W[-16] + W[-7] 4 at a time #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \ - MOVL e, R13 \ /* y0 = e */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - MOVL a, R14 \ /* y1 = a */ - LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ - MOVL f, R15 \ /* y2 = f */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ - XORL g, R15 \ /* y2 = f^g */ - LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ - XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */ - ANDL e, R15 \ /* y2 = (f^g)&e */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - \ /* */ - \ /* compute s0 */ - \ /* */ - LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */ - XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */ - MOVL a, R13 \ /* y0 = a */ - ADDL R15, h \ /* h = h + S1 + CH + k + w */ - \ /* ROTATE_ARGS */ - MOVL a, R15 \ /* y2 = a */ - LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */ - ORL c, R13 \ /* y0 = a|c */ - ADDL h, d \ /* d = d + h + S1 + CH + k + w */ - ANDL c, R15 \ /* y2 = a&c */ - LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */ - ANDL b, R13 \ /* y0 = (a|c)&b */ - ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ - LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - MOVL d, R13 \ /* y0 = e */ - MOVL h, R14 \ /* y1 = a */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */ - MOVL e, R15 \ /* y2 = f */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */ - XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - XORL f, R15 \ /* y2 = f^g */ - LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ANDL d, R15 \ /* y2 = (f^g)&e */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */ - XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */ - LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ - MOVL h, R13 \ /* y0 = a */ - ADDL R15, g \ /* h = h + S1 + CH + k + w */ - MOVL h, R15 \ /* y2 = a */ - LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */ - ORL b, R13 \ /* y0 = a|c */ - ADDL g, c \ /* d = d + h + S1 + CH + k + w */ - ANDL b, R15 \ /* y2 = a&c */ - \ /* */ - \ /* compute low s1 */ - \ /* */ - LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ - ANDL a, R13 \ /* y0 = (a|c)&b */ - ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */ - LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - MOVL c, R13 \ /* y0 = e */ - MOVL g, R14 \ /* y1 = a */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - MOVL d, R15 \ /* y2 = f */ - XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ - XORL e, R15 \ /* y2 = f^g */ - LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ - XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ANDL c, R15 \ /* y2 = (f^g)&e */ - LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */ - LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ - MOVL g, R13 \ /* y0 = a */ - ADDL R15, f \ /* h = h + S1 + CH + k + w */ - MOVL g, R15 \ /* y2 = a */ - LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */ - ORL a, R13 \ /* y0 = a|c */ - ADDL f, b \ /* d = d + h + S1 + CH + k + w */ - ANDL a, R15 \ /* y2 = a&c */ - LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ - ANDL h, R13 \ /* y0 = (a|c)&b */ - ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */ - \ /* */ - \ /* compute high s1 */ - \ /* */ - LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - MOVL b, R13 \ /* y0 = e */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - MOVL f, R14 \ /* y1 = a */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */ - MOVL c, R15 \ /* y2 = f */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ - XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */ - XORL d, R15 \ /* y2 = f^g */ - LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ - XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ANDL b, R15 \ /* y2 = (f^g)&e */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ - XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */ - LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */ - LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ - MOVL f, R13 \ /* y0 = a */ - ADDL R15, e \ /* h = h + S1 + CH + k + w */ - MOVL f, R15 \ /* y2 = a */ - LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */ - ORL h, R13 \ /* y0 = a|c */ - ADDL e, a \ /* d = d + h + S1 + CH + k + w */ - ANDL h, R15 \ /* y2 = a&c */ - LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ - ANDL g, R13 \ /* y0 = (a|c)&b */ - ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - ROTATE_XS - + MOVL e, R13 \ // y0 = e + ROLL $18, R13 \ // y0 = e >> (25-11) + MOVL a, R14 \ // y1 = a + LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */ + ROLL $23, R14 \ // y1 = a >> (22-13) + XORL e, R13 \ // y0 = e ^ (e >> (25-11)) + MOVL f, R15 \ // y2 = f + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + XORL a, R14 \ // y1 = a ^ (a >> (22-13) + XORL g, R15 \ // y2 = f^g + LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ + XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) + ANDL e, R15 \ // y2 = (f^g)&e + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + \ + \ // compute s0 + \ + LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */ + XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + XORL g, R15 \ // y2 = CH = ((f^g)&e)^g + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL R13, R15 \ // y2 = S1 + CH + ADDL _xfer+48(FP), R15 \ // y2 = k + w + S1 + CH + MOVL a, R13 \ // y0 = a + ADDL R15, h \ // h = h + S1 + CH + k + w + \ // ROTATE_ARGS + MOVL a, R15 \ // y2 = a + LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */ + ORL c, R13 \ // y0 = a|c + ADDL h, d \ // d = d + h + S1 + CH + k + w + ANDL c, R15 \ // y2 = a&c + LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */ + ANDL b, R13 \ // y0 = (a|c)&b + ADDL R14, h \ // h = h + S1 + CH + k + w + S0 + LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, h \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + MOVL d, R13 \ // y0 = e + MOVL h, R14 \ // y1 = a + ROLL $18, R13 \ // y0 = e >> (25-11) + XORL d, R13 \ // y0 = e ^ (e >> (25-11)) + MOVL e, R15 \ // y2 = f + ROLL $23, R14 \ // y1 = a >> (22-13) + LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */ + XORL h, R14 \ // y1 = a ^ (a >> (22-13) + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + XORL f, R15 \ // y2 = f^g + LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */ + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + XORL d, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ANDL d, R15 \ // y2 = (f^g)&e + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */ + XORL h, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + XORL f, R15 \ // y2 = CH = ((f^g)&e)^g + LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */ + ADDL R13, R15 \ // y2 = S1 + CH + ADDL _xfer+52(FP), R15 \ // y2 = k + w + S1 + CH + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ + MOVL h, R13 \ // y0 = a + ADDL R15, g \ // h = h + S1 + CH + k + w + MOVL h, R15 \ // y2 = a + LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */ + ORL b, R13 \ // y0 = a|c + ADDL g, c \ // d = d + h + S1 + CH + k + w + ANDL b, R15 \ // y2 = a&c + \ + \ // compute low s1 + \ + LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ + ANDL a, R13 \ // y0 = (a|c)&b + ADDL R14, g \ // h = h + S1 + CH + k + w + S0 + LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, g \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + MOVL c, R13 \ // y0 = e + MOVL g, R14 \ // y1 = a + ROLL $18, R13 \ // y0 = e >> (25-11) + XORL c, R13 \ // y0 = e ^ (e >> (25-11)) + ROLL $23, R14 \ // y1 = a >> (22-13) + MOVL d, R15 \ // y2 = f + XORL g, R14 \ // y1 = a ^ (a >> (22-13) + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ + XORL e, R15 \ // y2 = f^g + LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ + XORL c, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ANDL c, R15 \ // y2 = (f^g)&e + LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + XORL g, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + XORL e, R15 \ // y2 = CH = ((f^g)&e)^g + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ + ADDL R13, R15 \ // y2 = S1 + CH + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL _xfer+56(FP), R15 \ // y2 = k + w + S1 + CH + LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ + MOVL g, R13 \ // y0 = a + ADDL R15, f \ // h = h + S1 + CH + k + w + MOVL g, R15 \ // y2 = a + LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */ + ORL a, R13 \ // y0 = a|c + ADDL f, b \ // d = d + h + S1 + CH + k + w + ANDL a, R15 \ // y2 = a&c + LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ + ANDL h, R13 \ // y0 = (a|c)&b + ADDL R14, f \ // h = h + S1 + CH + k + w + S0 + \ + \ // compute high s1 + \ + LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, f \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + MOVL b, R13 \ // y0 = e + ROLL $18, R13 \ // y0 = e >> (25-11) + MOVL f, R14 \ // y1 = a + ROLL $23, R14 \ // y1 = a >> (22-13) + XORL b, R13 \ // y0 = e ^ (e >> (25-11)) + MOVL c, R15 \ // y2 = f + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ + XORL f, R14 \ // y1 = a ^ (a >> (22-13) + XORL d, R15 \ // y2 = f^g + LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ + XORL b, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ANDL b, R15 \ // y2 = (f^g)&e + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ + XORL f, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + XORL d, R15 \ // y2 = CH = ((f^g)&e)^g + LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL R13, R15 \ // y2 = S1 + CH + ADDL _xfer+60(FP), R15 \ // y2 = k + w + S1 + CH + LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ + MOVL f, R13 \ // y0 = a + ADDL R15, e \ // h = h + S1 + CH + k + w + MOVL f, R15 \ // y2 = a + LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */ + ORL h, R13 \ // y0 = a|c + ADDL e, a \ // d = d + h + S1 + CH + k + w + ANDL h, R15 \ // y2 = a&c + LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ + ANDL g, R13 \ // y0 = (a|c)&b + ADDL R14, e \ // h = h + S1 + CH + k + w + S0 + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, e \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + ROTATE_XS #define DO_ROUND(a, b, c, d, e, f, g, h, offset) \ - MOVL e, R13 \ /* y0 = e */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - MOVL a, R14 \ /* y1 = a */ - XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - MOVL f, R15 \ /* y2 = f */ - XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - XORL g, R15 \ /* y2 = f^g */ - XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - ANDL e, R15 \ /* y2 = (f^g)&e */ - XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */ - MOVL a, R13 \ /* y0 = a */ - ADDL R15, h \ /* h = h + S1 + CH + k + w */ - MOVL a, R15 \ /* y2 = a */ - ORL c, R13 \ /* y0 = a|c */ - ADDL h, d \ /* d = d + h + S1 + CH + k + w */ - ANDL c, R15 \ /* y2 = a&c */ - ANDL b, R13 \ /* y0 = (a|c)&b */ - ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */ - + MOVL e, R13 \ // y0 = e + ROLL $18, R13 \ // y0 = e >> (25-11) + MOVL a, R14 \ // y1 = a + XORL e, R13 \ // y0 = e ^ (e >> (25-11)) + ROLL $23, R14 \ // y1 = a >> (22-13) + MOVL f, R15 \ // y2 = f + XORL a, R14 \ // y1 = a ^ (a >> (22-13) + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + XORL g, R15 \ // y2 = f^g + XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + ANDL e, R15 \ // y2 = (f^g)&e + XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + XORL g, R15 \ // y2 = CH = ((f^g)&e)^g + ADDL R13, R15 \ // y2 = S1 + CH + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL _xfer+offset(FP), R15 \ // y2 = k + w + S1 + CH + MOVL a, R13 \ // y0 = a + ADDL R15, h \ // h = h + S1 + CH + k + w + MOVL a, R15 \ // y2 = a + ORL c, R13 \ // y0 = a|c + ADDL h, d \ // d = d + h + S1 + CH + k + w + ANDL c, R15 \ // y2 = a&c + ANDL b, R13 \ // y0 = (a|c)&b + ADDL R14, h \ // h = h + S1 + CH + k + w + S0 + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ // func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) -TEXT ·blockAvx(SB), 7, $0 - - MOVQ h+0(FP), SI // SI: &h - MOVQ message+24(FP), R8 // &message - MOVQ lenmessage+32(FP), R9 // length of message - CMPQ R9, $0 - JEQ done_hash - ADDQ R8, R9 - MOVQ R9, _inp_end+64(FP) // store end of message - - // Register definition - // a --> eax - // b --> ebx - // c --> ecx - // d --> r8d - // e --> edx - // f --> r9d - // g --> r10d - // h --> r11d - // - // y0 --> r13d - // y1 --> r14d - // y2 --> r15d - - MOVL (0*4)(SI), AX // a = H0 - MOVL (1*4)(SI), BX // b = H1 - MOVL (2*4)(SI), CX // c = H2 - MOVL (3*4)(SI), R8 // d = H3 - MOVL (4*4)(SI), DX // e = H4 - MOVL (5*4)(SI), R9 // f = H5 - MOVL (6*4)(SI), R10 // g = H6 - MOVL (7*4)(SI), R11 // h = H7 +TEXT ·blockAvx(SB), 7, $0-80 + + MOVQ h+0(FP), SI // SI: &h + MOVQ message_base+24(FP), R8 // &message + MOVQ message_len+32(FP), R9 // length of message + CMPQ R9, $0 + JEQ done_hash + ADDQ R8, R9 + MOVQ R9, reserved2+64(FP) // store end of message + + // Register definition + // a --> eax + // b --> ebx + // c --> ecx + // d --> r8d + // e --> edx + // f --> r9d + // g --> r10d + // h --> r11d + // + // y0 --> r13d + // y1 --> r14d + // y2 --> r15d + + MOVL (0*4)(SI), AX // a = H0 + MOVL (1*4)(SI), BX // b = H1 + MOVL (2*4)(SI), CX // c = H2 + MOVL (3*4)(SI), R8 // d = H3 + MOVL (4*4)(SI), DX // e = H4 + MOVL (5*4)(SI), R9 // f = H5 + MOVL (6*4)(SI), R10 // g = H6 + MOVL (7*4)(SI), R11 // h = H7 MOVOU bflipMask<>(SB), X13 - MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA - MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 - - MOVQ message+24(FP), SI // SI: &message + MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA + MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 + + MOVQ message_base+24(FP), SI // SI: &message loop0: LEAQ constants<>(SB), BP // byte swap first 16 dwords - MOVOU 0*16(SI), X4 - LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13 - MOVOU 1*16(SI), X5 - LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13 - MOVOU 2*16(SI), X6 - LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13 - MOVOU 3*16(SI), X7 - LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13 - - MOVQ SI, _inp+72(FP) - MOVD $0x3, DI + MOVOU 0*16(SI), X4 + LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13 + MOVOU 1*16(SI), X5 + LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13 + MOVOU 2*16(SI), X6 + LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13 + MOVOU 3*16(SI), X7 + LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13 + + MOVQ SI, reserved3+72(FP) + MOVD $0x3, DI // schedule 48 input dwords, by doing 3 rounds of 16 each loop1: - LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ - MOVOU X9, _xfer+48(FP) - FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) - - LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */ - MOVOU X9, _xfer+48(FP) - FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) - - LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */ - MOVOU X9, _xfer+48(FP) - FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) - - LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */ - MOVOU X9, _xfer+48(FP) - ADDQ $64, BP - FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) - - SUBQ $1, DI - JNE loop1 - - MOVD $0x2, DI + LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, reserved0+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, reserved0+48(FP) + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */ + MOVOU X9, reserved0+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */ + MOVOU X9, reserved0+48(FP) + ADDQ $64, BP + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + SUBQ $1, DI + JNE loop1 + + MOVD $0x2, DI + loop2: - LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ - MOVOU X9, _xfer+48(FP) - DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) - DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) - DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) - DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) - - LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */ - MOVOU X9, _xfer+48(FP) - ADDQ $32, BP - DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) - DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) - DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) - DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) - - MOVOU X6, X4 - MOVOU X7, X5 - - SUBQ $1, DI - JNE loop2 - - MOVQ h+0(FP), SI // SI: &h - ADDL (0*4)(SI), AX // H0 = a + H0 - MOVL AX, (0*4)(SI) - ADDL (1*4)(SI), BX // H1 = b + H1 - MOVL BX, (1*4)(SI) - ADDL (2*4)(SI), CX // H2 = c + H2 - MOVL CX, (2*4)(SI) - ADDL (3*4)(SI), R8 // H3 = d + H3 - MOVL R8, (3*4)(SI) - ADDL (4*4)(SI), DX // H4 = e + H4 - MOVL DX, (4*4)(SI) - ADDL (5*4)(SI), R9 // H5 = f + H5 - MOVL R9, (5*4)(SI) - ADDL (6*4)(SI), R10 // H6 = g + H6 - MOVL R10, (6*4)(SI) - ADDL (7*4)(SI), R11 // H7 = h + H7 - MOVL R11, (7*4)(SI) - - MOVQ _inp+72(FP), SI + LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, reserved0+48(FP) + DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) + DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) + DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) + DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) + + LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, reserved0+48(FP) + ADDQ $32, BP + DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) + DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) + DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) + DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) + + MOVOU X6, X4 + MOVOU X7, X5 + + SUBQ $1, DI + JNE loop2 + + MOVQ h+0(FP), SI // SI: &h + ADDL (0*4)(SI), AX // H0 = a + H0 + MOVL AX, (0*4)(SI) + ADDL (1*4)(SI), BX // H1 = b + H1 + MOVL BX, (1*4)(SI) + ADDL (2*4)(SI), CX // H2 = c + H2 + MOVL CX, (2*4)(SI) + ADDL (3*4)(SI), R8 // H3 = d + H3 + MOVL R8, (3*4)(SI) + ADDL (4*4)(SI), DX // H4 = e + H4 + MOVL DX, (4*4)(SI) + ADDL (5*4)(SI), R9 // H5 = f + H5 + MOVL R9, (5*4)(SI) + ADDL (6*4)(SI), R10 // H6 = g + H6 + MOVL R10, (6*4)(SI) + ADDL (7*4)(SI), R11 // H7 = h + H7 + MOVL R11, (7*4)(SI) + + MOVQ reserved3+72(FP), SI ADDQ $64, SI - CMPQ _inp_end+64(FP), SI - JNE loop0 + CMPQ reserved2+64(FP), SI + JNE loop0 done_hash: - RET + RET // Constants table DATA constants<>+0x0(SB)/8, $0x71374491428a2f98 diff --git a/sha256blockSha_amd64.go b/sha256blockSha_amd64.go new file mode 100644 index 0000000..483689e --- /dev/null +++ b/sha256blockSha_amd64.go @@ -0,0 +1,6 @@ +//+build !noasm,!appengine + +package sha256 + +//go:noescape +func blockSha(h *[8]uint32, message []uint8) diff --git a/sha256blockSha_amd64.s b/sha256blockSha_amd64.s new file mode 100644 index 0000000..909fc0e --- /dev/null +++ b/sha256blockSha_amd64.s @@ -0,0 +1,266 @@ +//+build !noasm,!appengine + +// SHA intrinsic version of SHA256 + +// Kristofer Peterson, (C) 2018. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "textflag.h" + +DATA K<>+0x00(SB)/4, $0x428a2f98 +DATA K<>+0x04(SB)/4, $0x71374491 +DATA K<>+0x08(SB)/4, $0xb5c0fbcf +DATA K<>+0x0c(SB)/4, $0xe9b5dba5 +DATA K<>+0x10(SB)/4, $0x3956c25b +DATA K<>+0x14(SB)/4, $0x59f111f1 +DATA K<>+0x18(SB)/4, $0x923f82a4 +DATA K<>+0x1c(SB)/4, $0xab1c5ed5 +DATA K<>+0x20(SB)/4, $0xd807aa98 +DATA K<>+0x24(SB)/4, $0x12835b01 +DATA K<>+0x28(SB)/4, $0x243185be +DATA K<>+0x2c(SB)/4, $0x550c7dc3 +DATA K<>+0x30(SB)/4, $0x72be5d74 +DATA K<>+0x34(SB)/4, $0x80deb1fe +DATA K<>+0x38(SB)/4, $0x9bdc06a7 +DATA K<>+0x3c(SB)/4, $0xc19bf174 +DATA K<>+0x40(SB)/4, $0xe49b69c1 +DATA K<>+0x44(SB)/4, $0xefbe4786 +DATA K<>+0x48(SB)/4, $0x0fc19dc6 +DATA K<>+0x4c(SB)/4, $0x240ca1cc +DATA K<>+0x50(SB)/4, $0x2de92c6f +DATA K<>+0x54(SB)/4, $0x4a7484aa +DATA K<>+0x58(SB)/4, $0x5cb0a9dc +DATA K<>+0x5c(SB)/4, $0x76f988da +DATA K<>+0x60(SB)/4, $0x983e5152 +DATA K<>+0x64(SB)/4, $0xa831c66d +DATA K<>+0x68(SB)/4, $0xb00327c8 +DATA K<>+0x6c(SB)/4, $0xbf597fc7 +DATA K<>+0x70(SB)/4, $0xc6e00bf3 +DATA K<>+0x74(SB)/4, $0xd5a79147 +DATA K<>+0x78(SB)/4, $0x06ca6351 +DATA K<>+0x7c(SB)/4, $0x14292967 +DATA K<>+0x80(SB)/4, $0x27b70a85 +DATA K<>+0x84(SB)/4, $0x2e1b2138 +DATA K<>+0x88(SB)/4, $0x4d2c6dfc +DATA K<>+0x8c(SB)/4, $0x53380d13 +DATA K<>+0x90(SB)/4, $0x650a7354 +DATA K<>+0x94(SB)/4, $0x766a0abb +DATA K<>+0x98(SB)/4, $0x81c2c92e +DATA K<>+0x9c(SB)/4, $0x92722c85 +DATA K<>+0xa0(SB)/4, $0xa2bfe8a1 +DATA K<>+0xa4(SB)/4, $0xa81a664b +DATA K<>+0xa8(SB)/4, $0xc24b8b70 +DATA K<>+0xac(SB)/4, $0xc76c51a3 +DATA K<>+0xb0(SB)/4, $0xd192e819 +DATA K<>+0xb4(SB)/4, $0xd6990624 +DATA K<>+0xb8(SB)/4, $0xf40e3585 +DATA K<>+0xbc(SB)/4, $0x106aa070 +DATA K<>+0xc0(SB)/4, $0x19a4c116 +DATA K<>+0xc4(SB)/4, $0x1e376c08 +DATA K<>+0xc8(SB)/4, $0x2748774c +DATA K<>+0xcc(SB)/4, $0x34b0bcb5 +DATA K<>+0xd0(SB)/4, $0x391c0cb3 +DATA K<>+0xd4(SB)/4, $0x4ed8aa4a +DATA K<>+0xd8(SB)/4, $0x5b9cca4f +DATA K<>+0xdc(SB)/4, $0x682e6ff3 +DATA K<>+0xe0(SB)/4, $0x748f82ee +DATA K<>+0xe4(SB)/4, $0x78a5636f +DATA K<>+0xe8(SB)/4, $0x84c87814 +DATA K<>+0xec(SB)/4, $0x8cc70208 +DATA K<>+0xf0(SB)/4, $0x90befffa +DATA K<>+0xf4(SB)/4, $0xa4506ceb +DATA K<>+0xf8(SB)/4, $0xbef9a3f7 +DATA K<>+0xfc(SB)/4, $0xc67178f2 +GLOBL K<>(SB), RODATA|NOPTR, $256 + +DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203 +DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b +GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16 + +// Register Usage +// BX base address of constant table (constant) +// DX hash_state (constant) +// SI hash_data.data +// DI hash_data.data + hash_data.length - 64 (constant) +// X0 scratch +// X1 scratch +// X2 working hash state // ABEF +// X3 working hash state // CDGH +// X4 first 16 bytes of block +// X5 second 16 bytes of block +// X6 third 16 bytes of block +// X7 fourth 16 bytes of block +// X12 saved hash state // ABEF +// X13 saved hash state // CDGH +// X15 data shuffle mask (constant) + +TEXT ·blockSha(SB), NOSPLIT, $0-32 + MOVQ h+0(FP), DX + MOVQ message_base+8(FP), SI + MOVQ message_len+16(FP), DI + LEAQ -64(SI)(DI*1), DI + MOVOU (DX), X2 + MOVOU 16(DX), X1 + MOVO X2, X3 + PUNPCKLLQ X1, X2 + PUNPCKHLQ X1, X3 + PSHUFD $0x27, X2, X2 + PSHUFD $0x27, X3, X3 + MOVO SHUF_MASK<>(SB), X15 + LEAQ K<>(SB), BX + + JMP TEST + +LOOP: + MOVO X2, X12 + MOVO X3, X13 + + // load block and shuffle + MOVOU (SI), X4 + MOVOU 16(SI), X5 + MOVOU 32(SI), X6 + MOVOU 48(SI), X7 + PSHUFB X15, X4 + PSHUFB X15, X5 + PSHUFB X15, X6 + PSHUFB X15, X7 + +#define ROUND456 \ + PADDL X5, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X5, X1 \ + LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4 + PADDL X1, X6 \ + LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 + +#define ROUND567 \ + PADDL X6, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X6, X1 \ + LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4 + PADDL X1, X7 \ + LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 + +#define ROUND674 \ + PADDL X7, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X7, X1 \ + LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4 + PADDL X1, X4 \ + LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7 + +#define ROUND745 \ + PADDL X4, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X4, X1 \ + LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4 + PADDL X1, X5 \ + LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4 + + // rounds 0-3 + MOVO (BX), X0 + PADDL X4, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + // rounds 4-7 + MOVO 1*16(BX), X0 + PADDL X5, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 + + // rounds 8-11 + MOVO 2*16(BX), X0 + PADDL X6, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 + + MOVO 3*16(BX), X0; ROUND674 // rounds 12-15 + MOVO 4*16(BX), X0; ROUND745 // rounds 16-19 + MOVO 5*16(BX), X0; ROUND456 // rounds 20-23 + MOVO 6*16(BX), X0; ROUND567 // rounds 24-27 + MOVO 7*16(BX), X0; ROUND674 // rounds 28-31 + MOVO 8*16(BX), X0; ROUND745 // rounds 32-35 + MOVO 9*16(BX), X0; ROUND456 // rounds 36-39 + MOVO 10*16(BX), X0; ROUND567 // rounds 40-43 + MOVO 11*16(BX), X0; ROUND674 // rounds 44-47 + MOVO 12*16(BX), X0; ROUND745 // rounds 48-51 + + // rounds 52-55 + MOVO 13*16(BX), X0 + PADDL X5, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + MOVO X5, X1 + LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4 + PADDL X1, X6 + LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + // rounds 56-59 + MOVO 14*16(BX), X0 + PADDL X6, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + MOVO X6, X1 + LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4 + PADDL X1, X7 + LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + // rounds 60-63 + MOVO 15*16(BX), X0 + PADDL X7, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + PADDL X12, X2 + PADDL X13, X3 + + ADDQ $64, SI + +TEST: + CMPQ SI, DI + JBE LOOP + + PSHUFD $0x4e, X3, X0 + LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0 + PSHUFD $0x4e, X2, X1 + LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f + PSHUFD $0x1b, X0, X0 + PSHUFD $0x1b, X1, X1 + + MOVOU X0, (DX) + MOVOU X1, 16(DX) + + RET diff --git a/sha256blockSha_amd64_test.go b/sha256blockSha_amd64_test.go new file mode 100644 index 0000000..c43202e --- /dev/null +++ b/sha256blockSha_amd64_test.go @@ -0,0 +1,77 @@ +//+build !noasm,!appengine + +package sha256 + +import ( + "crypto/sha256" + "encoding/binary" + "testing" +) + +func sha256hash(m []byte) (r [32]byte) { + var h [8]uint32 + + h[0] = 0x6a09e667 + h[1] = 0xbb67ae85 + h[2] = 0x3c6ef372 + h[3] = 0xa54ff53a + h[4] = 0x510e527f + h[5] = 0x9b05688c + h[6] = 0x1f83d9ab + h[7] = 0x5be0cd19 + + blockSha(&h, m) + l0 := len(m) + l := l0 & (BlockSize - 1) + m = m[l0-l:] + + var k [64]byte + copy(k[:], m) + + k[l] = 0x80 + + if l >= 56 { + blockSha(&h, k[:]) + binary.LittleEndian.PutUint64(k[0:8], 0) + binary.LittleEndian.PutUint64(k[8:16], 0) + binary.LittleEndian.PutUint64(k[16:24], 0) + binary.LittleEndian.PutUint64(k[24:32], 0) + binary.LittleEndian.PutUint64(k[32:40], 0) + binary.LittleEndian.PutUint64(k[40:48], 0) + binary.LittleEndian.PutUint64(k[48:56], 0) + } + binary.BigEndian.PutUint64(k[56:64], uint64(l0)<<3) + blockSha(&h, k[:]) + + binary.BigEndian.PutUint32(r[0:4], h[0]) + binary.BigEndian.PutUint32(r[4:8], h[1]) + binary.BigEndian.PutUint32(r[8:12], h[2]) + binary.BigEndian.PutUint32(r[12:16], h[3]) + binary.BigEndian.PutUint32(r[16:20], h[4]) + binary.BigEndian.PutUint32(r[20:24], h[5]) + binary.BigEndian.PutUint32(r[24:28], h[6]) + binary.BigEndian.PutUint32(r[28:32], h[7]) + + return +} + +func runTestSha(hashfunc func([]byte) [32]byte) bool { + var m = []byte("This is a message. This is a message. This is a message. This is a message.") + + ar := hashfunc(m) + br := sha256.Sum256(m) + + return ar == br +} + +func TestSha0(t *testing.T) { + if !runTestSha(Sum256) { + t.Errorf("FAILED") + } +} + +func TestSha1(t *testing.T) { + if sha && ssse3 && sse41 && !runTestSha(sha256hash) { + t.Errorf("FAILED") + } +} diff --git a/sha256blockSsse_amd64.go b/sha256blockSsse_amd64.go index 54abbb0..1ae2320 100644 --- a/sha256blockSsse_amd64.go +++ b/sha256blockSsse_amd64.go @@ -1,4 +1,4 @@ -//+build !noasm +//+build !noasm,!appengine /* * Minio Cloud Storage, (C) 2016 Minio, Inc. diff --git a/sha256blockSsse_amd64.s b/sha256blockSsse_amd64.s index cf12484..7afb45c 100644 --- a/sha256blockSsse_amd64.s +++ b/sha256blockSsse_amd64.s @@ -1,4 +1,4 @@ -//+build !noasm !appengine +//+build !noasm,!appengine // SHA256 implementation for SSSE3 @@ -35,351 +35,350 @@ #include "textflag.h" #define ROTATE_XS \ - MOVOU X4, X15 \ - MOVOU X5, X4 \ - MOVOU X6, X5 \ - MOVOU X7, X6 \ - MOVOU X15, X7 + MOVOU X4, X15 \ + MOVOU X5, X4 \ + MOVOU X6, X5 \ + MOVOU X7, X6 \ + MOVOU X15, X7 // compute s0 four at a time and s1 two at a time // compute W[-16] + W[-7] 4 at a time #define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \ - MOVL e, R13 \ /* y0 = e */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - MOVL a, R14 \ /* y1 = a */ - MOVOU X7, X0 \ - LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ - MOVL f, R15 \ /* y2 = f */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ - XORL g, R15 \ /* y2 = f^g */ - LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ - XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */ - ANDL e, R15 \ /* y2 = (f^g)&e */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - \ /* */ - \ /* compute s0 */ - \ /* */ - MOVOU X5, X1 \ - LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */ - XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */ - MOVL a, R13 \ /* y0 = a */ - ADDL R15, h \ /* h = h + S1 + CH + k + w */ - \ /* ROTATE_ARGS */ - MOVL a, R15 \ /* y2 = a */ - MOVOU X1, X2 \ - LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */ - ORL c, R13 \ /* y0 = a|c */ - ADDL h, d \ /* d = d + h + S1 + CH + k + w */ - ANDL c, R15 \ /* y2 = a&c */ - MOVOU X1, X3 \ - LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */ - ANDL b, R13 \ /* y0 = (a|c)&b */ - ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ - LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - MOVL d, R13 \ /* y0 = e */ - MOVL h, R14 \ /* y1 = a */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */ - MOVL e, R15 \ /* y2 = f */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - MOVOU X1, X2 \ - LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */ - XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - XORL f, R15 \ /* y2 = f^g */ - MOVOU X1, X8 \ - LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ANDL d, R15 \ /* y2 = (f^g)&e */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */ - XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */ - LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ - MOVL h, R13 \ /* y0 = a */ - ADDL R15, g \ /* h = h + S1 + CH + k + w */ - MOVL h, R15 \ /* y2 = a */ - MOVOU X3, X1 \ - LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */ - ORL b, R13 \ /* y0 = a|c */ - ADDL g, c \ /* d = d + h + S1 + CH + k + w */ - ANDL b, R15 \ /* y2 = a&c */ - \ /* */ - \ /* compute low s1 */ - \ /* */ - LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ - ANDL a, R13 \ /* y0 = (a|c)&b */ - ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */ - LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - MOVL c, R13 \ /* y0 = e */ - MOVL g, R14 \ /* y1 = a */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - MOVL d, R15 \ /* y2 = f */ - XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - MOVOU X2, X8 \ - LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ - XORL e, R15 \ /* y2 = f^g */ - MOVOU X2, X3 \ - LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ - XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ANDL c, R15 \ /* y2 = (f^g)&e */ - LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */ - LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ - MOVL g, R13 \ /* y0 = a */ - ADDL R15, f \ /* h = h + S1 + CH + k + w */ - MOVL g, R15 \ /* y2 = a */ - LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */ - ORL a, R13 \ /* y0 = a|c */ - ADDL f, b \ /* d = d + h + S1 + CH + k + w */ - ANDL a, R15 \ /* y2 = a&c */ - LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ - ANDL h, R13 \ /* y0 = (a|c)&b */ - ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */ - \ /* */ - \ /* compute high s1 */ - \ /* */ - LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - MOVL b, R13 \ /* y0 = e */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - MOVL f, R14 \ /* y1 = a */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */ - MOVL c, R15 \ /* y2 = f */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - MOVOU X2, X11 \ - LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ - XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */ - XORL d, R15 \ /* y2 = f^g */ - MOVOU X2, X3 \ - LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ - XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ANDL b, R15 \ /* y2 = (f^g)&e */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ - XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */ - LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */ - LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ - MOVL f, R13 \ /* y0 = a */ - ADDL R15, e \ /* h = h + S1 + CH + k + w */ - MOVL f, R15 \ /* y2 = a */ - LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */ - ORL h, R13 \ /* y0 = a|c */ - ADDL e, a \ /* d = d + h + S1 + CH + k + w */ - ANDL h, R15 \ /* y2 = a&c */ - MOVOU X11, X4 \ - LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ - ANDL g, R13 \ /* y0 = (a|c)&b */ - ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */ - \ /* ROTATE_ARGS */ - ROTATE_XS - + MOVL e, R13 \ // y0 = e + ROLL $18, R13 \ // y0 = e >> (25-11) + MOVL a, R14 \ // y1 = a + MOVOU X7, X0 \ + LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */ + ROLL $23, R14 \ // y1 = a >> (22-13) + XORL e, R13 \ // y0 = e ^ (e >> (25-11)) + MOVL f, R15 \ // y2 = f + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + XORL a, R14 \ // y1 = a ^ (a >> (22-13) + XORL g, R15 \ // y2 = f^g + LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ + XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) + ANDL e, R15 \ // y2 = (f^g)&e + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + \ + \ // compute s0 + \ + MOVOU X5, X1 \ + LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */ + XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + XORL g, R15 \ // y2 = CH = ((f^g)&e)^g + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL R13, R15 \ // y2 = S1 + CH + ADDL _xfer+48(FP), R15 \ // y2 = k + w + S1 + CH + MOVL a, R13 \ // y0 = a + ADDL R15, h \ // h = h + S1 + CH + k + w + \ // ROTATE_ARGS + MOVL a, R15 \ // y2 = a + MOVOU X1, X2 \ + LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */ + ORL c, R13 \ // y0 = a|c + ADDL h, d \ // d = d + h + S1 + CH + k + w + ANDL c, R15 \ // y2 = a&c + MOVOU X1, X3 \ + LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */ + ANDL b, R13 \ // y0 = (a|c)&b + ADDL R14, h \ // h = h + S1 + CH + k + w + S0 + LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, h \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + MOVL d, R13 \ // y0 = e + MOVL h, R14 \ // y1 = a + ROLL $18, R13 \ // y0 = e >> (25-11) + XORL d, R13 \ // y0 = e ^ (e >> (25-11)) + MOVL e, R15 \ // y2 = f + ROLL $23, R14 \ // y1 = a >> (22-13) + MOVOU X1, X2 \ + LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */ + XORL h, R14 \ // y1 = a ^ (a >> (22-13) + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + XORL f, R15 \ // y2 = f^g + MOVOU X1, X8 \ + LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */ + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + XORL d, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ANDL d, R15 \ // y2 = (f^g)&e + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */ + XORL h, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + XORL f, R15 \ // y2 = CH = ((f^g)&e)^g + LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */ + ADDL R13, R15 \ // y2 = S1 + CH + ADDL _xfer+52(FP), R15 \ // y2 = k + w + S1 + CH + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ + MOVL h, R13 \ // y0 = a + ADDL R15, g \ // h = h + S1 + CH + k + w + MOVL h, R15 \ // y2 = a + MOVOU X3, X1 \ + LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */ + ORL b, R13 \ // y0 = a|c + ADDL g, c \ // d = d + h + S1 + CH + k + w + ANDL b, R15 \ // y2 = a&c + \ + \ // compute low s1 + \ + LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ + ANDL a, R13 \ // y0 = (a|c)&b + ADDL R14, g \ // h = h + S1 + CH + k + w + S0 + LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, g \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + MOVL c, R13 \ // y0 = e + MOVL g, R14 \ // y1 = a + ROLL $18, R13 \ // y0 = e >> (25-11) + XORL c, R13 \ // y0 = e ^ (e >> (25-11)) + ROLL $23, R14 \ // y1 = a >> (22-13) + MOVL d, R15 \ // y2 = f + XORL g, R14 \ // y1 = a ^ (a >> (22-13) + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + MOVOU X2, X8 \ + LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ + XORL e, R15 \ // y2 = f^g + MOVOU X2, X3 \ + LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ + XORL c, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ANDL c, R15 \ // y2 = (f^g)&e + LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + XORL g, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + XORL e, R15 \ // y2 = CH = ((f^g)&e)^g + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ + ADDL R13, R15 \ // y2 = S1 + CH + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL _xfer+56(FP), R15 \ // y2 = k + w + S1 + CH + LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ + MOVL g, R13 \ // y0 = a + ADDL R15, f \ // h = h + S1 + CH + k + w + MOVL g, R15 \ // y2 = a + LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */ + ORL a, R13 \ // y0 = a|c + ADDL f, b \ // d = d + h + S1 + CH + k + w + ANDL a, R15 \ // y2 = a&c + LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ + ANDL h, R13 \ // y0 = (a|c)&b + ADDL R14, f \ // h = h + S1 + CH + k + w + S0 + \ + \ // compute high s1 + \ + LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, f \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + MOVL b, R13 \ // y0 = e + ROLL $18, R13 \ // y0 = e >> (25-11) + MOVL f, R14 \ // y1 = a + ROLL $23, R14 \ // y1 = a >> (22-13) + XORL b, R13 \ // y0 = e ^ (e >> (25-11)) + MOVL c, R15 \ // y2 = f + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + MOVOU X2, X11 \ + LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ + XORL f, R14 \ // y1 = a ^ (a >> (22-13) + XORL d, R15 \ // y2 = f^g + MOVOU X2, X3 \ + LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ + XORL b, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ANDL b, R15 \ // y2 = (f^g)&e + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ + XORL f, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + XORL d, R15 \ // y2 = CH = ((f^g)&e)^g + LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL R13, R15 \ // y2 = S1 + CH + ADDL _xfer+60(FP), R15 \ // y2 = k + w + S1 + CH + LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ + MOVL f, R13 \ // y0 = a + ADDL R15, e \ // h = h + S1 + CH + k + w + MOVL f, R15 \ // y2 = a + LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */ + ORL h, R13 \ // y0 = a|c + ADDL e, a \ // d = d + h + S1 + CH + k + w + ANDL h, R15 \ // y2 = a&c + MOVOU X11, X4 \ + LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ + ANDL g, R13 \ // y0 = (a|c)&b + ADDL R14, e \ // h = h + S1 + CH + k + w + S0 + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, e \ // h = h + S1 + CH + k + w + S0 + MAJ + \ // ROTATE_ARGS + ROTATE_XS #define DO_ROUND(a, b, c, d, e, f, g, h, offset) \ - MOVL e, R13 \ /* y0 = e */ - ROLL $18, R13 \ /* y0 = e >> (25-11) */ - MOVL a, R14 \ /* y1 = a */ - XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ - ROLL $23, R14 \ /* y1 = a >> (22-13) */ - MOVL f, R15 \ /* y2 = f */ - XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ - ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ - XORL g, R15 \ /* y2 = f^g */ - XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ - ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ - ANDL e, R15 \ /* y2 = (f^g)&e */ - XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ - ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ - XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ - ADDL R13, R15 \ /* y2 = S1 + CH */ - ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ - ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */ - MOVL a, R13 \ /* y0 = a */ - ADDL R15, h \ /* h = h + S1 + CH + k + w */ - MOVL a, R15 \ /* y2 = a */ - ORL c, R13 \ /* y0 = a|c */ - ADDL h, d \ /* d = d + h + S1 + CH + k + w */ - ANDL c, R15 \ /* y2 = a&c */ - ANDL b, R13 \ /* y0 = (a|c)&b */ - ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ - ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ - ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */ - + MOVL e, R13 \ // y0 = e + ROLL $18, R13 \ // y0 = e >> (25-11) + MOVL a, R14 \ // y1 = a + XORL e, R13 \ // y0 = e ^ (e >> (25-11)) + ROLL $23, R14 \ // y1 = a >> (22-13) + MOVL f, R15 \ // y2 = f + XORL a, R14 \ // y1 = a ^ (a >> (22-13) + ROLL $27, R13 \ // y0 = (e >> (11-6)) ^ (e >> (25-6)) + XORL g, R15 \ // y2 = f^g + XORL e, R13 \ // y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) + ROLL $21, R14 \ // y1 = (a >> (13-2)) ^ (a >> (22-2)) + ANDL e, R15 \ // y2 = (f^g)&e + XORL a, R14 \ // y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) + ROLL $26, R13 \ // y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) + XORL g, R15 \ // y2 = CH = ((f^g)&e)^g + ADDL R13, R15 \ // y2 = S1 + CH + ROLL $30, R14 \ // y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) + ADDL _xfer+offset(FP), R15 \ // y2 = k + w + S1 + CH + MOVL a, R13 \ // y0 = a + ADDL R15, h \ // h = h + S1 + CH + k + w + MOVL a, R15 \ // y2 = a + ORL c, R13 \ // y0 = a|c + ADDL h, d \ // d = d + h + S1 + CH + k + w + ANDL c, R15 \ // y2 = a&c + ANDL b, R13 \ // y0 = (a|c)&b + ADDL R14, h \ // h = h + S1 + CH + k + w + S0 + ORL R15, R13 \ // y0 = MAJ = (a|c)&b)|(a&c) + ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ // func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) -TEXT ·blockSsse(SB), 7, $0 - - MOVQ h+0(FP), SI // SI: &h - MOVQ message+24(FP), R8 // &message - MOVQ lenmessage+32(FP), R9 // length of message - CMPQ R9, $0 - JEQ done_hash - ADDQ R8, R9 - MOVQ R9, _inp_end+64(FP) // store end of message - - // Register definition - // a --> eax - // b --> ebx - // c --> ecx - // d --> r8d - // e --> edx - // f --> r9d - // g --> r10d - // h --> r11d - // - // y0 --> r13d - // y1 --> r14d - // y2 --> r15d - - MOVL (0*4)(SI), AX // a = H0 - MOVL (1*4)(SI), BX // b = H1 - MOVL (2*4)(SI), CX // c = H2 - MOVL (3*4)(SI), R8 // d = H3 - MOVL (4*4)(SI), DX // e = H4 - MOVL (5*4)(SI), R9 // f = H5 - MOVL (6*4)(SI), R10 // g = H6 - MOVL (7*4)(SI), R11 // h = H7 +TEXT ·blockSsse(SB), 7, $0-80 + + MOVQ h+0(FP), SI // SI: &h + MOVQ message_base+24(FP), R8 // &message + MOVQ message_len+32(FP), R9 // length of message + CMPQ R9, $0 + JEQ done_hash + ADDQ R8, R9 + MOVQ R9, reserved2+64(FP) // store end of message + + // Register definition + // a --> eax + // b --> ebx + // c --> ecx + // d --> r8d + // e --> edx + // f --> r9d + // g --> r10d + // h --> r11d + // + // y0 --> r13d + // y1 --> r14d + // y2 --> r15d + + MOVL (0*4)(SI), AX // a = H0 + MOVL (1*4)(SI), BX // b = H1 + MOVL (2*4)(SI), CX // c = H2 + MOVL (3*4)(SI), R8 // d = H3 + MOVL (4*4)(SI), DX // e = H4 + MOVL (5*4)(SI), R9 // f = H5 + MOVL (6*4)(SI), R10 // g = H6 + MOVL (7*4)(SI), R11 // h = H7 MOVOU bflipMask<>(SB), X13 - MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA - MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 - - MOVQ message+24(FP), SI // SI: &message + MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA + MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 + + MOVQ message_base+24(FP), SI // SI: &message loop0: LEAQ constants<>(SB), BP // byte swap first 16 dwords - MOVOU 0*16(SI), X4 - LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13 - MOVOU 1*16(SI), X5 - LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13 - MOVOU 2*16(SI), X6 - LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13 - MOVOU 3*16(SI), X7 - LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13 - - MOVQ SI, _inp+72(FP) - MOVD $0x3, DI - - // Align - // nop WORD PTR [rax+rax*1+0x0] + MOVOU 0*16(SI), X4 + LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13 + MOVOU 1*16(SI), X5 + LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13 + MOVOU 2*16(SI), X6 + LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13 + MOVOU 3*16(SI), X7 + LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13 + + MOVQ SI, reserved3+72(FP) + MOVD $0x3, DI + + // Align + // nop WORD PTR [rax+rax*1+0x0] // schedule 48 input dwords, by doing 3 rounds of 16 each loop1: - MOVOU X4, X9 - LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ - MOVOU X9, _xfer+48(FP) - FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) - - MOVOU X4, X9 - LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ - MOVOU X9, _xfer+48(FP) - FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) - - MOVOU X4, X9 - LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */ - MOVOU X9, _xfer+48(FP) - FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) - - MOVOU X4, X9 - LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */ - MOVOU X9, _xfer+48(FP) - ADDQ $64, BP - FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) - - SUBQ $1, DI - JNE loop1 - - MOVD $0x2, DI + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, reserved0+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, reserved0+48(FP) + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */ + MOVOU X9, reserved0+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */ + MOVOU X9, reserved0+48(FP) + ADDQ $64, BP + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + SUBQ $1, DI + JNE loop1 + + MOVD $0x2, DI + loop2: - MOVOU X4, X9 - LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ - MOVOU X9, _xfer+48(FP) - DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) - DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) - DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) - DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) - - MOVOU X5, X9 - LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ - MOVOU X9, _xfer+48(FP) - ADDQ $32, BP - DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) - DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) - DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) - DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) - - MOVOU X6, X4 - MOVOU X7, X5 - - SUBQ $1, DI - JNE loop2 - - MOVQ h+0(FP), SI // SI: &h - ADDL (0*4)(SI), AX // H0 = a + H0 - MOVL AX, (0*4)(SI) - ADDL (1*4)(SI), BX // H1 = b + H1 - MOVL BX, (1*4)(SI) - ADDL (2*4)(SI), CX // H2 = c + H2 - MOVL CX, (2*4)(SI) - ADDL (3*4)(SI), R8 // H3 = d + H3 - MOVL R8, (3*4)(SI) - ADDL (4*4)(SI), DX // H4 = e + H4 - MOVL DX, (4*4)(SI) - ADDL (5*4)(SI), R9 // H5 = f + H5 - MOVL R9, (5*4)(SI) - ADDL (6*4)(SI), R10 // H6 = g + H6 - MOVL R10, (6*4)(SI) - ADDL (7*4)(SI), R11 // H7 = h + H7 - MOVL R11, (7*4)(SI) - - MOVQ _inp+72(FP), SI + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, reserved0+48(FP) + DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) + DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) + DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) + DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) + + MOVOU X5, X9 + LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, reserved0+48(FP) + ADDQ $32, BP + DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) + DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) + DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) + DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) + + MOVOU X6, X4 + MOVOU X7, X5 + + SUBQ $1, DI + JNE loop2 + + MOVQ h+0(FP), SI // SI: &h + ADDL (0*4)(SI), AX // H0 = a + H0 + MOVL AX, (0*4)(SI) + ADDL (1*4)(SI), BX // H1 = b + H1 + MOVL BX, (1*4)(SI) + ADDL (2*4)(SI), CX // H2 = c + H2 + MOVL CX, (2*4)(SI) + ADDL (3*4)(SI), R8 // H3 = d + H3 + MOVL R8, (3*4)(SI) + ADDL (4*4)(SI), DX // H4 = e + H4 + MOVL DX, (4*4)(SI) + ADDL (5*4)(SI), R9 // H5 = f + H5 + MOVL R9, (5*4)(SI) + ADDL (6*4)(SI), R10 // H6 = g + H6 + MOVL R10, (6*4)(SI) + ADDL (7*4)(SI), R11 // H7 = h + H7 + MOVL R11, (7*4)(SI) + + MOVQ reserved3+72(FP), SI ADDQ $64, SI - CMPQ _inp_end+64(FP), SI - JNE loop0 + CMPQ reserved2+64(FP), SI + JNE loop0 done_hash: - RET + RET // Constants table DATA constants<>+0x0(SB)/8, $0x71374491428a2f98 diff --git a/sha256block_386.go b/sha256block_386.go deleted file mode 100644 index 84b54ae..0000000 --- a/sha256block_386.go +++ /dev/null @@ -1,24 +0,0 @@ -//+build !noasm - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package sha256 - -func blockArmGo(dig *digest, p []byte) {} -func blockAvx2Go(dig *digest, p []byte) {} -func blockAvxGo(dig *digest, p []byte) {} -func blockSsseGo(dig *digest, p []byte) {} diff --git a/sha256block_amd64.go b/sha256block_amd64.go index b6db61e..1c4d97f 100644 --- a/sha256block_amd64.go +++ b/sha256block_amd64.go @@ -1,4 +1,4 @@ -//+build !noasm +//+build !noasm,!appengine /* * Minio Cloud Storage, (C) 2016 Minio, Inc. @@ -46,3 +46,8 @@ dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] } + +func blockShaGo(dig *digest, p []byte) { + + blockSha(&dig.h, p) +} diff --git a/sha256block_arm.go b/sha256block_arm.go deleted file mode 100644 index d892504..0000000 --- a/sha256block_arm.go +++ /dev/null @@ -1,24 +0,0 @@ -//+build !noasm - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package sha256 - -func blockAvx2Go(dig *digest, p []byte) {} -func blockAvxGo(dig *digest, p []byte) {} -func blockSsseGo(dig *digest, p []byte) {} -func blockArmGo(dig *digest, p []byte) {} diff --git a/sha256block_arm64.go b/sha256block_arm64.go index 299cf33..0979c20 100644 --- a/sha256block_arm64.go +++ b/sha256block_arm64.go @@ -1,4 +1,4 @@ -//+build !noasm +//+build !noasm,!appengine /* * Minio Cloud Storage, (C) 2016 Minio, Inc. @@ -21,6 +21,7 @@ func blockAvx2Go(dig *digest, p []byte) {} func blockAvxGo(dig *digest, p []byte) {} func blockSsseGo(dig *digest, p []byte) {} +func blockShaGo(dig *digest, p []byte) {} //go:noescape func blockArm(h []uint32, message []uint8) diff --git a/sha256block_arm64.s b/sha256block_arm64.s index 104ce11..c6ddb37 100644 --- a/sha256block_arm64.s +++ b/sha256block_arm64.s @@ -1,4 +1,4 @@ -//+build !noasm !appengine +//+build !noasm,!appengine // ARM64 version of SHA256 @@ -28,7 +28,7 @@ TEXT ·blockArm(SB), 7, $0 MOVD h+0(FP), R0 MOVD message+24(FP), R1 - MOVD lenmessage+32(FP), R2 // length of message + MOVD message_len+32(FP), R2 // length of message SUBS $64, R2 BMI complete @@ -154,7 +154,6 @@ complete: RET - // Constants table DATA ·constants+0x0(SB)/8, $0x71374491428a2f98 DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf diff --git a/sha256block_noasm.go b/sha256block_noasm.go deleted file mode 100644 index 356ca53..0000000 --- a/sha256block_noasm.go +++ /dev/null @@ -1,136 +0,0 @@ -//+build !arm64 !amd64 noasm appengine - -/* - * Minio Cloud Storage, (C) 2016 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package sha256 - -func blockGeneric(dig *digest, p []byte) { - var w [64]uint32 - h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] - for len(p) >= chunk { - // Can interlace the computation of w with the - // rounds below if needed for speed. - for i := 0; i < 16; i++ { - j := i * 4 - w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) - } - for i := 16; i < 64; i++ { - v1 := w[i-2] - t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10) - v2 := w[i-15] - t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3) - w[i] = t1 + w[i-7] + t2 + w[i-16] - } - - a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7 - - for i := 0; i < 64; i++ { - t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] - - t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c)) - - h = g - g = f - f = e - e = d + t1 - d = c - c = b - b = a - a = t1 + t2 - } - - h0 += a - h1 += b - h2 += c - h3 += d - h4 += e - h5 += f - h6 += g - h7 += h - - p = p[chunk:] - } - - dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 -} - -var _K = []uint32{ - 0x428a2f98, - 0x71374491, - 0xb5c0fbcf, - 0xe9b5dba5, - 0x3956c25b, - 0x59f111f1, - 0x923f82a4, - 0xab1c5ed5, - 0xd807aa98, - 0x12835b01, - 0x243185be, - 0x550c7dc3, - 0x72be5d74, - 0x80deb1fe, - 0x9bdc06a7, - 0xc19bf174, - 0xe49b69c1, - 0xefbe4786, - 0x0fc19dc6, - 0x240ca1cc, - 0x2de92c6f, - 0x4a7484aa, - 0x5cb0a9dc, - 0x76f988da, - 0x983e5152, - 0xa831c66d, - 0xb00327c8, - 0xbf597fc7, - 0xc6e00bf3, - 0xd5a79147, - 0x06ca6351, - 0x14292967, - 0x27b70a85, - 0x2e1b2138, - 0x4d2c6dfc, - 0x53380d13, - 0x650a7354, - 0x766a0abb, - 0x81c2c92e, - 0x92722c85, - 0xa2bfe8a1, - 0xa81a664b, - 0xc24b8b70, - 0xc76c51a3, - 0xd192e819, - 0xd6990624, - 0xf40e3585, - 0x106aa070, - 0x19a4c116, - 0x1e376c08, - 0x2748774c, - 0x34b0bcb5, - 0x391c0cb3, - 0x4ed8aa4a, - 0x5b9cca4f, - 0x682e6ff3, - 0x748f82ee, - 0x78a5636f, - 0x84c87814, - 0x8cc70208, - 0x90befffa, - 0xa4506ceb, - 0xbef9a3f7, - 0xc67178f2, -} diff --git a/sha256block_other.go b/sha256block_other.go index 38e7403..0187c95 100644 --- a/sha256block_other.go +++ b/sha256block_other.go @@ -1,23 +1,25 @@ -// Minio Cloud Storage, (C) 2016 Minio, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// +//+build appengine noasm !amd64,!arm64 -// +build ppc64 ppc64le mips mipsle mips64 mips64le s390x +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package sha256 func blockAvx2Go(dig *digest, p []byte) {} func blockAvxGo(dig *digest, p []byte) {} func blockSsseGo(dig *digest, p []byte) {} +func blockShaGo(dig *digest, p []byte) {} func blockArmGo(dig *digest, p []byte) {} diff --git a/test-architectures.sh b/test-architectures.sh new file mode 100755 index 0000000..50150ea --- /dev/null +++ b/test-architectures.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +go tool dist list | while IFS=/ read os arch; do + echo "Checking $os/$arch..." + echo " normal" + GOARCH=$arch GOOS=$os go build -o /dev/null ./... + echo " noasm" + GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null ./... + echo " appengine" + GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null ./... + echo " noasm,appengine" + GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null ./... +done