New Upstream Release - golang-siphash-dev
Ready changes
Summary
Merged new upstream version: 1.2.3 (was: 1.0.0).
Diff
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0e259d4
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,121 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+ HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display,
+ communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+ likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+ v. rights protecting the extraction, dissemination, use and reuse of data
+ in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation
+ thereof, including any amended or successor version of such
+ directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+ world based on applicable law or treaty, and any national
+ implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+ warranties of any kind concerning the Work, express, implied,
+ statutory or otherwise, including without limitation warranties of
+ title, merchantability, fitness for a particular purpose, non
+ infringement, or the absence of latent or other defects, accuracy, or
+ the present or absence of errors, whether or not discoverable, all to
+ the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without
+ limitation any person's Copyright and Related Rights in the Work.
+ Further, Affirmer disclaims responsibility for obtaining any necessary
+ consents, permissions or other rights required for any use of the
+ Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to
+ this CC0 or use of the Work.
diff --git a/README b/README
deleted file mode 100644
index 9c9ca20..0000000
--- a/README
+++ /dev/null
@@ -1,49 +0,0 @@
-Go implementation of SipHash-2-4, a fast short-input PRF created by
-Jean-Philippe Aumasson and Daniel J. Bernstein (http://131002.net/siphash/).
-
-INSTALLATION
-
- $ go get github.com/dchest/siphash
-
-USAGE
-
- import "github.com/dchest/siphash"
-
-There are two ways to use this package.
-The slower one is to use the standard hash.Hash64 interface:
-
- h := siphash.New(key)
- h.Write([]byte("Hello"))
- sum := h.Sum(nil) // returns 8-byte []byte
-
-or
-
- sum64 := h.Sum64() // returns uint64
-
-The faster one is to use Hash() function, which takes two uint64 parts of
-16-byte key and a byte slice, and returns uint64 hash:
-
- sum64 := siphash.Hash(key0, key1, []byte("Hello"))
-
-The keys and output are little-endian.
-
-FUNCTIONS
-
-func Hash(k0, k1 uint64, p []byte) uint64
-
- Hash returns the 64-bit SipHash-2-4 of the given byte slice with two
- 64-bit parts of 128-bit key: k0 and k1.
-
-func New(key []byte) hash.Hash64
-
- New returns a new hash.Hash64 computing SipHash-2-4 with 16-byte key.
-
-
-PUBLIC DOMAIN DEDICATION
-
-Written in 2012 by Dmitry Chestnykh.
-
-To the extent possible under law, the author have dedicated all copyright
-and related and neighboring rights to this software to the public domain
-worldwide. This software is distributed without any warranty.
-http://creativecommons.org/publicdomain/zero/1.0/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5745ded
--- /dev/null
+++ b/README.md
@@ -0,0 +1,67 @@
+SipHash (Go)
+============
+
+Go implementation of SipHash-2-4, a fast short-input PRF created by
+Jean-Philippe Aumasson and Daniel J. Bernstein (http://131002.net/siphash/).
+
+
+## Installation
+
+ $ go get github.com/dchest/siphash
+
+## Usage
+
+ import "github.com/dchest/siphash"
+
+There are two ways to use this package.
+The slower one is to use the standard hash.Hash64 interface:
+
+ h := siphash.New(key)
+ h.Write([]byte("Hello"))
+ sum := h.Sum(nil) // returns 8-byte []byte
+
+or
+
+ sum64 := h.Sum64() // returns uint64
+
+The faster one is to use Hash() function, which takes two uint64 parts of
+16-byte key and a byte slice, and returns uint64 hash:
+
+ sum64 := siphash.Hash(key0, key1, []byte("Hello"))
+
+The keys and output are little-endian.
+
+
+## Functions
+
+### func Hash(k0, k1 uint64, p []byte) uint64
+
+Hash returns the 64-bit SipHash-2-4 of the given byte slice with two
+64-bit parts of 128-bit key: k0 and k1.
+
+### func Hash128(k0, k1 uint64, p []byte) (uint64, uint64)
+
+Hash128 returns the 128-bit SipHash-2-4 of the given byte slice with two
+64-bit parts of 128-bit key: k0 and k1.
+
+Note that 128-bit SipHash is considered experimental by SipHash authors at this time.
+
+### func New(key []byte) hash.Hash64
+
+New returns a new hash.Hash64 computing SipHash-2-4 with 16-byte key.
+
+### func New128(key []byte) hash.Hash
+
+New128 returns a new hash.Hash computing SipHash-2-4 with 16-byte key and 16-byte output.
+
+Note that 16-byte output is considered experimental by SipHash authors at this time.
+
+
+## Public domain dedication
+
+Written by Dmitry Chestnykh and Damian Gryski.
+
+To the extent possible under law, the authors have dedicated all copyright
+and related and neighboring rights to this software to the public domain
+worldwide. This software is distributed without any warranty.
+http://creativecommons.org/publicdomain/zero/1.0/
diff --git a/blocks.go b/blocks.go
new file mode 100644
index 0000000..14e763c
--- /dev/null
+++ b/blocks.go
@@ -0,0 +1,149 @@
+//go:build (!arm && !amd64) || appengine || gccgo
+// +build !arm,!amd64 appengine gccgo
+
+package siphash
+
+func once(d *digest) {
+ blocks(d, d.x[:])
+}
+
+func finalize(d *digest) uint64 {
+ d0 := *d
+ once(&d0)
+
+ v0, v1, v2, v3 := d0.v0, d0.v1, d0.v2, d0.v3
+ v2 ^= 0xff
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 3.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 4.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ return v0 ^ v1 ^ v2 ^ v3
+}
+
+func blocks(d *digest, p []uint8) {
+ v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3
+
+ for len(p) >= BlockSize {
+ m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
+ uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+
+ v3 ^= m
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ v0 ^= m
+
+ p = p[BlockSize:]
+ }
+
+ d.v0, d.v1, d.v2, d.v3 = v0, v1, v2, v3
+}
diff --git a/blocks_amd64.s b/blocks_amd64.s
new file mode 100644
index 0000000..2327866
--- /dev/null
+++ b/blocks_amd64.s
@@ -0,0 +1,87 @@
+//go:build amd64 && !appengine && !gccgo
+// +build amd64,!appengine,!gccgo
+
+#define ROUND(v0, v1, v2, v3) \
+ ADDQ v1, v0; \
+ RORQ $51, v1; \
+ ADDQ v3, v2; \
+ XORQ v0, v1; \
+ RORQ $48, v3; \
+ RORQ $32, v0; \
+ XORQ v2, v3; \
+ ADDQ v1, v2; \
+ ADDQ v3, v0; \
+ RORQ $43, v3; \
+ RORQ $47, v1; \
+ XORQ v0, v3; \
+ XORQ v2, v1; \
+ RORQ $32, v2
+
+// blocks(d *digest, data []uint8)
+TEXT ·blocks(SB),4,$0-32
+ MOVQ d+0(FP), BX
+ MOVQ 0(BX), R9 // R9 = v0
+ MOVQ 8(BX), R10 // R10 = v1
+ MOVQ 16(BX), R11 // R11 = v2
+ MOVQ 24(BX), R12 // R12 = v3
+ MOVQ p_base+8(FP), DI // DI = *uint64
+ MOVQ p_len+16(FP), SI // SI = nblocks
+ XORL DX, DX // DX = index (0)
+ SHRQ $3, SI // SI /= 8
+body:
+ CMPQ DX, SI
+ JGE end
+ MOVQ 0(DI)(DX*8), CX // CX = m
+ XORQ CX, R12
+ ROUND(R9, R10, R11, R12)
+ ROUND(R9, R10, R11, R12)
+ XORQ CX, R9
+ ADDQ $1, DX
+ JMP body
+end:
+ MOVQ R9, 0(BX)
+ MOVQ R10, 8(BX)
+ MOVQ R11, 16(BX)
+ MOVQ R12, 24(BX)
+ RET
+
+// once(d *digest)
+TEXT ·once(SB),4,$0-8
+ MOVQ d+0(FP), BX
+ MOVQ 0(BX), R9 // R9 = v0
+ MOVQ 8(BX), R10 // R10 = v1
+ MOVQ 16(BX), R11 // R11 = v2
+ MOVQ 24(BX), R12 // R12 = v3
+ MOVQ 48(BX), CX // CX = d.x[:]
+ XORQ CX, R12
+ ROUND(R9, R10, R11, R12)
+ ROUND(R9, R10, R11, R12)
+ XORQ CX, R9
+ MOVQ R9, 0(BX)
+ MOVQ R10, 8(BX)
+ MOVQ R11, 16(BX)
+ MOVQ R12, 24(BX)
+ RET
+
+// finalize(d *digest) uint64
+TEXT ·finalize(SB),4,$0-16
+ MOVQ d+0(FP), BX
+ MOVQ 0(BX), R9 // R9 = v0
+ MOVQ 8(BX), R10 // R10 = v1
+ MOVQ 16(BX), R11 // R11 = v2
+ MOVQ 24(BX), R12 // R12 = v3
+ MOVQ 48(BX), CX // CX = d.x[:]
+ XORQ CX, R12
+ ROUND(R9, R10, R11, R12)
+ ROUND(R9, R10, R11, R12)
+ XORQ CX, R9
+ NOTB R11
+ ROUND(R9, R10, R11, R12)
+ ROUND(R9, R10, R11, R12)
+ ROUND(R9, R10, R11, R12)
+ ROUND(R9, R10, R11, R12)
+ XORQ R12, R11
+ XORQ R10, R9
+ XORQ R11, R9
+ MOVQ R9, ret+8(FP)
+ RET
diff --git a/blocks_arm.s b/blocks_arm.s
new file mode 100644
index 0000000..adf5d67
--- /dev/null
+++ b/blocks_arm.s
@@ -0,0 +1,140 @@
+#include "textflag.h"
+
+#define ROUND()\
+ ADD.S R2,R0,R0;\
+ ADC R3,R1,R1;\
+ EOR R2<<13,R0,R8;\
+ EOR R3>>19,R8,R8;\
+ EOR R2>>19,R1,R11;\
+ EOR R3<<13,R11,R11;\
+ ADD.S R6,R4,R4;\
+ ADC R7,R5,R5;\
+ EOR R6<<16,R4,R2;\
+ EOR R7>>16,R2,R2;\
+ EOR R6>>16,R5,R3;\
+ EOR R7<<16,R3,R3;\
+ ADD.S R2,R1,R1;\
+ ADC R3,R0,R0;\
+ EOR R2<<21,R1,R6;\
+ EOR R3>>11,R6,R6;\
+ EOR R2>>11,R0,R7;\
+ EOR R3<<21,R7,R7;\
+ ADD.S R8,R4,R4;\
+ ADC R11,R5,R5;\
+ EOR R8<<17,R4,R2;\
+ EOR R11>>15,R2,R2;\
+ EOR R8>>15,R5,R3;\
+ EOR R11<<17,R3,R3;\
+ ADD.S R2,R1,R1;\
+ ADC R3,R0,R0;\
+ EOR R2<<13,R1,R8;\
+ EOR R3>>19,R8,R8;\
+ EOR R2>>19,R0,R11;\
+ EOR R3<<13,R11,R11;\
+ ADD.S R6,R5,R5;\
+ ADC R7,R4,R4;\
+ EOR R6<<16,R5,R2;\
+ EOR R7>>16,R2,R2;\
+ EOR R6>>16,R4,R3;\
+ EOR R7<<16,R3,R3;\
+ ADD.S R2,R0,R0;\
+ ADC R3,R1,R1;\
+ EOR R2<<21,R0,R6;\
+ EOR R3>>11,R6,R6;\
+ EOR R2>>11,R1,R7;\
+ EOR R3<<21,R7,R7;\
+ ADD.S R8,R5,R5;\
+ ADC R11,R4,R4;\
+ EOR R8<<17,R5,R2;\
+ EOR R11>>15,R2,R2;\
+ EOR R8>>15,R4,R3;\
+ EOR R11<<17,R3,R3;
+
+// once(d *digest)
+TEXT ·once(SB),NOSPLIT,$4-4
+ MOVW d+0(FP),R8
+ MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
+ MOVW 48(R8),R12
+ MOVW 52(R8),R14
+ EOR R12,R6,R6
+ EOR R14,R7,R7
+ ROUND()
+ EOR R12,R0,R0
+ EOR R14,R1,R1
+ MOVW d+0(FP),R8
+ MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
+ RET
+
+// finalize(d *digest) uint64
+TEXT ·finalize(SB),NOSPLIT,$4-12
+ MOVW d+0(FP),R8
+ MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
+ MOVW 48(R8),R12
+ MOVW 52(R8),R14
+ EOR R12,R6,R6
+ EOR R14,R7,R7
+ ROUND()
+ EOR R12,R0,R0
+ EOR R14,R1,R1
+ EOR $255,R4
+ ROUND()
+ ROUND()
+ EOR R2,R0,R0
+ EOR R3,R1,R1
+ EOR R6,R4,R4
+ EOR R7,R5,R5
+ EOR R4,R0,R0
+ EOR R5,R1,R1
+ MOVW R0,ret_lo+4(FP)
+ MOVW R1,ret_hi+8(FP)
+ RET
+
+// blocks(d *digest, data []uint8)
+TEXT ·blocks(SB),NOSPLIT,$4-16
+ MOVW d+0(FP),R8
+ MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
+ MOVW p+4(FP),R9
+ MOVW p_len+8(FP),R11
+ ADD R9,R11,R11
+ MOVW R11,endp-4(SP)
+ AND.S $3,R9,R8
+ BNE blocksunaligned
+blocksloop:
+ MOVM.IA.W (R9),[R12,R14]
+ EOR R12,R6,R6
+ EOR R14,R7,R7
+ ROUND()
+ EOR R12,R0,R0
+ EOR R14,R1,R1
+ MOVW endp-4(SP),R11
+ CMP R11,R9
+ BLO blocksloop
+ MOVW d+0(FP),R8
+ MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
+ RET
+blocksunaligned:
+ MOVBU.P 8(R9),R12
+ MOVBU -7(R9),R11
+ ORR R11<<8,R12,R12
+ MOVBU -6(R9),R11
+ ORR R11<<16,R12,R12
+ MOVBU -5(R9),R11
+ ORR R11<<24,R12,R12
+ MOVBU -4(R9),R14
+ MOVBU -3(R9),R11
+ ORR R11<<8,R14,R14
+ MOVBU -2(R9),R11
+ ORR R11<<16,R14,R14
+ MOVBU -1(R9),R11
+ ORR R11<<24,R14,R14
+ EOR R12,R6,R6
+ EOR R14,R7,R7
+ ROUND()
+ EOR R12,R0,R0
+ EOR R14,R1,R1
+ MOVW endp-4(SP),R11
+ CMP R11,R9
+ BLO blocksunaligned
+ MOVW d+0(FP),R8
+ MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
+ RET
diff --git a/blocks_asm.go b/blocks_asm.go
new file mode 100644
index 0000000..c14b35d
--- /dev/null
+++ b/blocks_asm.go
@@ -0,0 +1,22 @@
+//go:build arm || (amd64 && !appengine && !gccgo)
+// +build arm amd64,!appengine,!gccgo
+
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+// This file contains a function definition for use with assembly implementations of Hash()
+
+package siphash
+
+//go:noescape
+func blocks(d *digest, p []uint8)
+
+//go:noescape
+func finalize(d *digest) uint64
+
+//go:noescape
+func once(d *digest)
diff --git a/debian/changelog b/debian/changelog
index 5fd8bfb..9bcf3ab 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+golang-siphash-dev (1.2.3-1) UNRELEASED; urgency=low
+
+ * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk> Tue, 27 Jun 2023 02:00:50 -0000
+
golang-siphash-dev (1.0.0-2) unstable; urgency=medium
[ Ximin Luo ]
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..3783991
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module github.com/dchest/siphash
+
+go 1.16
diff --git a/hash.go b/hash.go
index 4e8027d..8754416 100644
--- a/hash.go
+++ b/hash.go
@@ -1,3 +1,6 @@
+//go:build (!arm && !amd64) || appengine || gccgo
+// +build !arm,!amd64 appengine gccgo
+
// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
diff --git a/hash128.go b/hash128.go
new file mode 100644
index 0000000..f92f773
--- /dev/null
+++ b/hash128.go
@@ -0,0 +1,304 @@
+//go:build (!arm && !amd64) || appengine || gccgo
+// +build !arm,!amd64 appengine gccgo
+
+// Written in 2012 by Dmitry Chestnykh.
+// Modifications 2014 for 128-bit hash function by Damian Gryski.
+//
+// To the extent possible under law, the authors have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+package siphash
+
+// Hash returns the 128-bit SipHash-2-4 of the given byte slice with two 64-bit
+// parts of 128-bit key: k0 and k1.
+//
+// Note that 128-bit SipHash is considered experimental by SipHash authors at this time.
+func Hash128(k0, k1 uint64, p []byte) (uint64, uint64) {
+ // Initialization.
+ v0 := k0 ^ 0x736f6d6570736575
+ v1 := k1 ^ 0x646f72616e646f6d
+ v2 := k0 ^ 0x6c7967656e657261
+ v3 := k1 ^ 0x7465646279746573
+ t := uint64(len(p)) << 56
+
+ v1 ^= 0xee
+
+ // Compression.
+ for len(p) >= BlockSize {
+ m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
+ uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+ v3 ^= m
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ v0 ^= m
+ p = p[BlockSize:]
+ }
+
+ // Compress last block.
+ switch len(p) {
+ case 7:
+ t |= uint64(p[6]) << 48
+ fallthrough
+ case 6:
+ t |= uint64(p[5]) << 40
+ fallthrough
+ case 5:
+ t |= uint64(p[4]) << 32
+ fallthrough
+ case 4:
+ t |= uint64(p[3]) << 24
+ fallthrough
+ case 3:
+ t |= uint64(p[2]) << 16
+ fallthrough
+ case 2:
+ t |= uint64(p[1]) << 8
+ fallthrough
+ case 1:
+ t |= uint64(p[0])
+ }
+
+ v3 ^= t
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ v0 ^= t
+
+ // Finalization.
+ v2 ^= 0xee
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 3.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 4.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ r0 := v0 ^ v1 ^ v2 ^ v3
+
+ v1 ^= 0xdd
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 3.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 4.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ r1 := v0 ^ v1 ^ v2 ^ v3
+
+ return r0, r1
+}
diff --git a/hash128_amd64.s b/hash128_amd64.s
new file mode 100644
index 0000000..230cdf2
--- /dev/null
+++ b/hash128_amd64.s
@@ -0,0 +1,288 @@
+//go:build amd64 && !appengine && !gccgo
+// +build amd64,!appengine,!gccgo
+
+// This is a translation of the gcc output of FloodyBerry's pure-C public
+// domain siphash implementation at https://github.com/floodyberry/siphash
+
+// This assembly code has been modified from the 64-bit output to the experiment 128-bit output.
+
+// SI = v0
+// AX = v1
+// CX = v2
+// DX = v3
+
+// func Hash128(k0, k1 uint64, b []byte) (r0 uint64, r1 uint64)
+TEXT ·Hash128(SB),4,$0-56
+ MOVQ k0+0(FP),CX
+ MOVQ $0x736F6D6570736575,R9
+ MOVQ k1+8(FP),DI
+ MOVQ $0x6C7967656E657261,BX
+ MOVQ $0x646F72616E646F6D,AX
+ MOVQ b_len+24(FP),DX
+ XORQ $0xEE,AX
+ MOVQ DX,R11
+ MOVQ DX,R10
+ XORQ CX,R9
+ XORQ CX,BX
+ MOVQ $0x7465646279746573,CX
+ XORQ DI,AX
+ XORQ DI,CX
+ SHLQ $0x38,R11
+ XORQ DI,DI
+ MOVQ b_base+16(FP),SI
+ ANDQ $0xFFFFFFFFFFFFFFF8,R10
+ JE afterLoop
+ XCHGQ AX,AX
+loopBody:
+ MOVQ 0(SI)(DI*1),R8
+ ADDQ AX,R9
+ RORQ $0x33,AX
+ XORQ R9,AX
+ RORQ $0x20,R9
+ ADDQ $0x8,DI
+ XORQ R8,CX
+ ADDQ CX,BX
+ RORQ $0x30,CX
+ XORQ BX,CX
+ ADDQ AX,BX
+ RORQ $0x2F,AX
+ ADDQ CX,R9
+ RORQ $0x2B,CX
+ XORQ BX,AX
+ XORQ R9,CX
+ RORQ $0x20,BX
+ ADDQ AX,R9
+ ADDQ CX,BX
+ RORQ $0x33,AX
+ RORQ $0x30,CX
+ XORQ R9,AX
+ XORQ BX,CX
+ RORQ $0x20,R9
+ ADDQ AX,BX
+ ADDQ CX,R9
+ RORQ $0x2F,AX
+ RORQ $0x2B,CX
+ XORQ BX,AX
+ RORQ $0x20,BX
+ XORQ R9,CX
+ XORQ R8,R9
+ CMPQ R10,DI
+ JA loopBody
+afterLoop:
+ ANDL $7, DX
+ JZ afterSwitch
+
+ // no support for jump tables
+
+ CMPQ DX,$0x7
+ JE sw7
+
+ CMPQ DX,$0x6
+ JE sw6
+
+ CMPQ DX,$0x5
+ JE sw5
+
+ CMPQ DX,$0x4
+ JE sw4
+
+ CMPQ DX,$0x3
+ JE sw3
+
+ CMPQ DX,$0x2
+ JE sw2
+
+ JMP sw1
+
+sw7: MOVBQZX 6(SI)(DI*1),DX
+ SHLQ $0x30,DX
+ ORQ DX,R11
+sw6: MOVBQZX 0x5(SI)(DI*1),DX
+ SHLQ $0x28,DX
+ ORQ DX,R11
+sw5: MOVBQZX 0x4(SI)(DI*1),DX
+ SHLQ $0x20,DX
+ ORQ DX,R11
+sw4: MOVBQZX 0x3(SI)(DI*1),DX
+ SHLQ $0x18,DX
+ ORQ DX,R11
+sw3: MOVBQZX 0x2(SI)(DI*1),DX
+ SHLQ $0x10,DX
+ ORQ DX,R11
+sw2: MOVBQZX 0x1(SI)(DI*1),DX
+ SHLQ $0x8,DX
+ ORQ DX,R11
+sw1: MOVBQZX 0(SI)(DI*1),DX
+ ORQ DX,R11
+afterSwitch:
+ LEAQ (AX)(R9*1),SI
+ XORQ R11,CX
+ RORQ $0x33,AX
+ ADDQ CX,BX
+ MOVQ CX,DX
+ XORQ SI,AX
+ RORQ $0x30,DX
+ RORQ $0x20,SI
+ LEAQ 0(BX)(AX*1),CX
+ XORQ BX,DX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ XORQ SI,AX
+ RORQ $0x30,DX
+ RORQ $0x20,SI
+ XORQ CX,DX
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ XORQ CX,AX
+ RORQ $0x2B,DX
+ RORQ $0x20,CX
+ XORQ SI,DX
+ XORQ R11,SI
+ XORB $0xEE,CL
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ XORQ CX,DX
+ RORQ $0x20,SI
+ ADDQ AX,CX
+ ADDQ DX,SI
+ RORQ $0x2F,AX
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ ADDQ DX,CX
+ RORQ $0x33,AX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ RORQ $0x20,SI
+ XORQ CX,DX
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ ADDQ DX,CX
+ RORQ $0x33,AX
+ RORQ $0x30,DX
+ XORQ CX,DX
+ XORQ SI,AX
+ RORQ $0x20,SI
+ ADDQ DX,SI
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ XORQ CX,AX
+ RORQ $0x2B,DX
+ RORQ $0x20,CX
+ XORQ SI,DX
+
+ // gcc optimized the tail end of this function differently. However,
+ // we need to preserve out registers to carry out the second stage of
+ // the finalization. This is a duplicate of an earlier finalization
+ // round.
+
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ XORQ CX,DX
+ RORQ $0x20,SI
+ ADDQ AX,CX
+ ADDQ DX,SI
+ RORQ $0x2F,AX
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+
+ // Stuff the result into BX instead of AX as gcc had done
+
+ MOVQ SI,BX
+ XORQ AX,BX
+ XORQ DX,BX
+ XORQ CX,BX
+ MOVQ BX,ret+40(FP)
+
+ // Start the second finalization round
+
+ XORB $0xDD,AL
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ XORQ CX,DX
+ RORQ $0x20,SI
+ ADDQ AX,CX
+ ADDQ DX,SI
+ RORQ $0x2F,AX
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ ADDQ DX,CX
+ RORQ $0x33,AX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ RORQ $0x20,SI
+ XORQ CX,DX
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ ADDQ DX,CX
+ RORQ $0x33,AX
+ RORQ $0x30,DX
+ XORQ CX,DX
+ XORQ SI,AX
+ RORQ $0x20,SI
+ ADDQ DX,SI
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ XORQ CX,AX
+ RORQ $0x2B,DX
+ RORQ $0x20,CX
+ XORQ SI,DX
+
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ XORQ CX,DX
+ RORQ $0x20,SI
+ ADDQ AX,CX
+ ADDQ DX,SI
+ RORQ $0x2F,AX
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+
+ MOVQ SI,BX
+ XORQ AX,BX
+ XORQ DX,BX
+ XORQ CX,BX
+ MOVQ BX,ret1+48(FP)
+
+ RET
diff --git a/hash_amd64.s b/hash_amd64.s
new file mode 100644
index 0000000..b3b87ee
--- /dev/null
+++ b/hash_amd64.s
@@ -0,0 +1,197 @@
+//go:build amd64 && !appengine && !gccgo
+// +build amd64,!appengine,!gccgo
+
+// This is a translation of the gcc output of FloodyBerry's pure-C public
+// domain siphash implementation at https://github.com/floodyberry/siphash
+// func Hash(k0, k1 uint64, b []byte) uint64
+TEXT ·Hash(SB),4,$0-48
+ MOVQ k0+0(FP),CX
+ MOVQ $0x736F6D6570736575,R9
+ MOVQ k1+8(FP),DI
+ MOVQ $0x6C7967656E657261,BX
+ MOVQ $0x646F72616E646F6D,AX
+ MOVQ b_len+24(FP),DX
+ MOVQ DX,R11
+ MOVQ DX,R10
+ XORQ CX,R9
+ XORQ CX,BX
+ MOVQ $0x7465646279746573,CX
+ XORQ DI,AX
+ XORQ DI,CX
+ SHLQ $0x38,R11
+ XORQ DI,DI
+ MOVQ b_base+16(FP),SI
+ ANDQ $0xFFFFFFFFFFFFFFF8,R10
+ JE afterLoop
+ XCHGQ AX,AX
+loopBody:
+ MOVQ 0(SI)(DI*1),R8
+ ADDQ AX,R9
+ RORQ $0x33,AX
+ XORQ R9,AX
+ RORQ $0x20,R9
+ ADDQ $0x8,DI
+ XORQ R8,CX
+ ADDQ CX,BX
+ RORQ $0x30,CX
+ XORQ BX,CX
+ ADDQ AX,BX
+ RORQ $0x2F,AX
+ ADDQ CX,R9
+ RORQ $0x2B,CX
+ XORQ BX,AX
+ XORQ R9,CX
+ RORQ $0x20,BX
+ ADDQ AX,R9
+ ADDQ CX,BX
+ RORQ $0x33,AX
+ RORQ $0x30,CX
+ XORQ R9,AX
+ XORQ BX,CX
+ RORQ $0x20,R9
+ ADDQ AX,BX
+ ADDQ CX,R9
+ RORQ $0x2F,AX
+ RORQ $0x2B,CX
+ XORQ BX,AX
+ RORQ $0x20,BX
+ XORQ R9,CX
+ XORQ R8,R9
+ CMPQ R10,DI
+ JA loopBody
+afterLoop:
+ ANDL $7, DX
+ JZ afterSwitch
+
+ // no support for jump tables
+
+ CMPQ DX,$0x7
+ JE sw7
+
+ CMPQ DX,$0x6
+ JE sw6
+
+ CMPQ DX,$0x5
+ JE sw5
+
+ CMPQ DX,$0x4
+ JE sw4
+
+ CMPQ DX,$0x3
+ JE sw3
+
+ CMPQ DX,$0x2
+ JE sw2
+
+ JMP sw1
+
+sw7: MOVBQZX 6(SI)(DI*1),DX
+ SHLQ $0x30,DX
+ ORQ DX,R11
+sw6: MOVBQZX 0x5(SI)(DI*1),DX
+ SHLQ $0x28,DX
+ ORQ DX,R11
+sw5: MOVBQZX 0x4(SI)(DI*1),DX
+ SHLQ $0x20,DX
+ ORQ DX,R11
+sw4: MOVBQZX 0x3(SI)(DI*1),DX
+ SHLQ $0x18,DX
+ ORQ DX,R11
+sw3: MOVBQZX 0x2(SI)(DI*1),DX
+ SHLQ $0x10,DX
+ ORQ DX,R11
+sw2: MOVBQZX 0x1(SI)(DI*1),DX
+ SHLQ $0x8,DX
+ ORQ DX,R11
+sw1: MOVBQZX 0(SI)(DI*1),DX
+ ORQ DX,R11
+afterSwitch:
+ LEAQ (AX)(R9*1),SI
+ XORQ R11,CX
+ RORQ $0x33,AX
+ ADDQ CX,BX
+ MOVQ CX,DX
+ XORQ SI,AX
+ RORQ $0x30,DX
+ RORQ $0x20,SI
+ LEAQ 0(BX)(AX*1),CX
+ XORQ BX,DX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ XORQ SI,AX
+ RORQ $0x30,DX
+ RORQ $0x20,SI
+ XORQ CX,DX
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ XORQ CX,AX
+ RORQ $0x2B,DX
+ RORQ $0x20,CX
+ XORQ SI,DX
+ XORQ R11,SI
+ XORB $0xFF,CL
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ XORQ CX,DX
+ RORQ $0x20,SI
+ ADDQ AX,CX
+ ADDQ DX,SI
+ RORQ $0x2F,AX
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ ADDQ DX,CX
+ RORQ $0x33,AX
+ RORQ $0x30,DX
+ XORQ SI,AX
+ RORQ $0x20,SI
+ XORQ CX,DX
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ ADDQ DX,SI
+ RORQ $0x2B,DX
+ XORQ CX,AX
+ XORQ SI,DX
+ RORQ $0x20,CX
+ ADDQ AX,SI
+ ADDQ DX,CX
+ RORQ $0x33,AX
+ RORQ $0x30,DX
+ XORQ CX,DX
+ XORQ SI,AX
+ RORQ $0x20,SI
+ ADDQ DX,SI
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ XORQ CX,AX
+ RORQ $0x2B,DX
+ RORQ $0x20,CX
+ XORQ SI,DX
+ ADDQ AX,SI
+ RORQ $0x33,AX
+ ADDQ DX,CX
+ XORQ SI,AX
+ RORQ $0x30,DX
+ XORQ CX,DX
+ ADDQ AX,CX
+ RORQ $0x2F,AX
+ XORQ CX,AX
+ RORQ $0x2B,DX
+ RORQ $0x20,CX
+ XORQ DX,AX
+ XORQ CX,AX
+ MOVQ AX,ret+40(FP)
+ RET
diff --git a/hash_arm.go b/hash_arm.go
new file mode 100644
index 0000000..e52f86c
--- /dev/null
+++ b/hash_arm.go
@@ -0,0 +1,28 @@
+//go:build arm
+// +build arm
+
+package siphash
+
+// NB: ARM implementation of forgoes extra speed for Hash()
+// and Hash128() by simply reusing the same blocks() implementation
+// in assembly used by the streaming hash.
+
+func Hash(k0, k1 uint64, p []byte) uint64 {
+ var d digest
+ d.size = Size
+ d.k0 = k0
+ d.k1 = k1
+ d.Reset()
+ d.Write(p)
+ return d.Sum64()
+}
+
+func Hash128(k0, k1 uint64, p []byte) (uint64, uint64) {
+ var d digest
+ d.size = Size128
+ d.k0 = k0
+ d.k1 = k1
+ d.Reset()
+ d.Write(p)
+ return d.sum128()
+}
diff --git a/hash_asm.go b/hash_asm.go
new file mode 100644
index 0000000..c29fb49
--- /dev/null
+++ b/hash_asm.go
@@ -0,0 +1,25 @@
+//go:build amd64 && !appengine && !gccgo
+// +build amd64,!appengine,!gccgo
+
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+// This file contains a function definition for use with assembly implementations of Hash()
+
+package siphash
+
+//go:noescape
+
+// Hash returns the 64-bit SipHash-2-4 of the given byte slice with two 64-bit
+// parts of 128-bit key: k0 and k1.
+func Hash(k0, k1 uint64, b []byte) uint64
+
+//go:noescape
+
+// Hash128 returns the 128-bit SipHash-2-4 of the given byte slice with two
+// 64-bit parts of 128-bit key: k0 and k1.
+func Hash128(k0, k1 uint64, b []byte) (uint64, uint64)
diff --git a/siphash.go b/siphash.go
index 89d40ad..4a3cb49 100644
--- a/siphash.go
+++ b/siphash.go
@@ -1,4 +1,4 @@
-// Written in 2012 by Dmitry Chestnykh.
+// Written in 2012-2014 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
@@ -12,34 +12,52 @@ package siphash
import "hash"
const (
- // The block size of hash algorithm in bytes.
+ // BlockSize is the block size of hash algorithm in bytes.
BlockSize = 8
- // The size of hash output in bytes.
+
+ // Size is the size of hash output in bytes.
Size = 8
+
+ // Size128 is the size of 128-bit hash output in bytes.
+ Size128 = 16
)
type digest struct {
v0, v1, v2, v3 uint64 // state
k0, k1 uint64 // two parts of key
- t uint8 // message bytes counter (mod 256)
- nx int // number of bytes in buffer x
x [8]byte // buffer for unprocessed bytes
+ nx int // number of bytes in buffer x
+ size int // output size in bytes (8 or 16)
+ t uint8 // message bytes counter (mod 256)
}
-// New returns a new hash.Hash64 computing SipHash-2-4 with 16-byte key.
-func New(key []byte) hash.Hash64 {
+// newDigest returns a new digest with the given output size in bytes (must be 8 or 16).
+func newDigest(size int, key []byte) *digest {
+ if size != Size && size != Size128 {
+ panic("size must be 8 or 16")
+ }
d := new(digest)
-
d.k0 = uint64(key[0]) | uint64(key[1])<<8 | uint64(key[2])<<16 | uint64(key[3])<<24 |
uint64(key[4])<<32 | uint64(key[5])<<40 | uint64(key[6])<<48 | uint64(key[7])<<56
-
d.k1 = uint64(key[8]) | uint64(key[9])<<8 | uint64(key[10])<<16 | uint64(key[11])<<24 |
uint64(key[12])<<32 | uint64(key[13])<<40 | uint64(key[14])<<48 | uint64(key[15])<<56
-
+ d.size = size
d.Reset()
return d
}
+// New returns a new hash.Hash64 computing SipHash-2-4 with 16-byte key and 8-byte output.
+func New(key []byte) hash.Hash64 {
+ return newDigest(Size, key)
+}
+
+// New128 returns a new hash.Hash computing SipHash-2-4 with 16-byte key and 16-byte output.
+//
+// Note that 16-byte output is considered experimental by SipHash authors at this time.
+func New128(key []byte) hash.Hash {
+ return newDigest(Size128, key)
+}
+
func (d *digest) Reset() {
d.v0 = d.k0 ^ 0x736f6d6570736575
d.v1 = d.k1 ^ 0x646f72616e646f6d
@@ -47,67 +65,15 @@ func (d *digest) Reset() {
d.v3 = d.k1 ^ 0x7465646279746573
d.t = 0
d.nx = 0
+ if d.size == Size128 {
+ d.v1 ^= 0xee
+ }
}
-func (d *digest) Size() int { return Size }
+func (d *digest) Size() int { return d.size }
func (d *digest) BlockSize() int { return BlockSize }
-func blocks(d *digest, p []uint8) {
- v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3
-
- for len(p) >= BlockSize {
- m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
- uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
-
- v3 ^= m
-
- // Round 1.
- v0 += v1
- v1 = v1<<13 | v1>>(64-13)
- v1 ^= v0
- v0 = v0<<32 | v0>>(64-32)
-
- v2 += v3
- v3 = v3<<16 | v3>>(64-16)
- v3 ^= v2
-
- v0 += v3
- v3 = v3<<21 | v3>>(64-21)
- v3 ^= v0
-
- v2 += v1
- v1 = v1<<17 | v1>>(64-17)
- v1 ^= v2
- v2 = v2<<32 | v2>>(64-32)
-
- // Round 2.
- v0 += v1
- v1 = v1<<13 | v1>>(64-13)
- v1 ^= v0
- v0 = v0<<32 | v0>>(64-32)
-
- v2 += v3
- v3 = v3<<16 | v3>>(64-16)
- v3 ^= v2
-
- v0 += v3
- v3 = v3<<21 | v3>>(64-21)
- v3 ^= v0
-
- v2 += v1
- v1 = v1<<17 | v1>>(64-17)
- v1 ^= v2
- v2 = v2<<32 | v2>>(64-32)
-
- v0 ^= m
-
- p = p[BlockSize:]
- }
-
- d.v0, d.v1, d.v2, d.v3 = v0, v1, v2, v3
-}
-
func (d *digest) Write(p []byte) (nn int, err error) {
nn = len(p)
d.t += uint8(nn)
@@ -118,7 +84,7 @@ func (d *digest) Write(p []byte) (nn int, err error) {
}
d.nx += copy(d.x[d.nx:], p)
if d.nx == BlockSize {
- blocks(d, d.x[:])
+ once(d)
d.nx = 0
}
p = p[n:]
@@ -134,7 +100,15 @@ func (d *digest) Write(p []byte) (nn int, err error) {
return
}
-func (d0 *digest) Sum64() uint64 {
+func (d *digest) Sum64() uint64 {
+ for i := d.nx; i < BlockSize-1; i++ {
+ d.x[i] = 0
+ }
+ d.x[7] = d.t
+ return finalize(d)
+}
+
+func (d0 *digest) sum128() (r0, r1 uint64) {
// Make a copy of d0 so that caller can keep writing and summing.
d := *d0
@@ -145,7 +119,7 @@ func (d0 *digest) Sum64() uint64 {
blocks(&d, d.x[:])
v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3
- v2 ^= 0xff
+ v2 ^= 0xee
// Round 1.
v0 += v1
@@ -223,12 +197,122 @@ func (d0 *digest) Sum64() uint64 {
v1 ^= v2
v2 = v2<<32 | v2>>(64-32)
- return v0 ^ v1 ^ v2 ^ v3
+ r0 = v0 ^ v1 ^ v2 ^ v3
+
+ v1 ^= 0xdd
+
+ // Round 1.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 2.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 3.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ // Round 4.
+ v0 += v1
+ v1 = v1<<13 | v1>>(64-13)
+ v1 ^= v0
+ v0 = v0<<32 | v0>>(64-32)
+
+ v2 += v3
+ v3 = v3<<16 | v3>>(64-16)
+ v3 ^= v2
+
+ v0 += v3
+ v3 = v3<<21 | v3>>(64-21)
+ v3 ^= v0
+
+ v2 += v1
+ v1 = v1<<17 | v1>>(64-17)
+ v1 ^= v2
+ v2 = v2<<32 | v2>>(64-32)
+
+ r1 = v0 ^ v1 ^ v2 ^ v3
+
+ return r0, r1
}
func (d *digest) Sum(in []byte) []byte {
- v := d.Sum64()
- in = append(in, byte(v), byte(v>>8), byte(v>>16), byte(v>>24),
- byte(v>>32), byte(v>>40), byte(v>>48), byte(v>>56))
+ if d.size == Size {
+ r := d.Sum64()
+ in = append(in,
+ byte(r),
+ byte(r>>8),
+ byte(r>>16),
+ byte(r>>24),
+ byte(r>>32),
+ byte(r>>40),
+ byte(r>>48),
+ byte(r>>56))
+ } else {
+ r0, r1 := d.sum128()
+ in = append(in,
+ byte(r0),
+ byte(r0>>8),
+ byte(r0>>16),
+ byte(r0>>24),
+ byte(r0>>32),
+ byte(r0>>40),
+ byte(r0>>48),
+ byte(r0>>56),
+ byte(r1),
+ byte(r1>>8),
+ byte(r1>>16),
+ byte(r1>>24),
+ byte(r1>>32),
+ byte(r1>>40),
+ byte(r1>>48),
+ byte(r1>>56))
+ }
return in
}
diff --git a/siphash_test.go b/siphash_test.go
index 5e01542..843b051 100644
--- a/siphash_test.go
+++ b/siphash_test.go
@@ -10,6 +10,7 @@ package siphash
import (
"bytes"
"encoding/binary"
+ "encoding/hex"
"testing"
)
@@ -130,6 +131,73 @@ var goldenRef = [][]byte{
{0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95},
}
+var goldenRef128 = [][]byte{
+ {0xa3, 0x81, 0x7f, 0x04, 0xba, 0x25, 0xa8, 0xe6, 0x6d, 0xf6, 0x72, 0x14, 0xc7, 0x55, 0x02, 0x93},
+ {0xda, 0x87, 0xc1, 0xd8, 0x6b, 0x99, 0xaf, 0x44, 0x34, 0x76, 0x59, 0x11, 0x9b, 0x22, 0xfc, 0x45},
+ {0x81, 0x77, 0x22, 0x8d, 0xa4, 0xa4, 0x5d, 0xc7, 0xfc, 0xa3, 0x8b, 0xde, 0xf6, 0x0a, 0xff, 0xe4},
+ {0x9c, 0x70, 0xb6, 0x0c, 0x52, 0x67, 0xa9, 0x4e, 0x5f, 0x33, 0xb6, 0xb0, 0x29, 0x85, 0xed, 0x51},
+ {0xf8, 0x81, 0x64, 0xc1, 0x2d, 0x9c, 0x8f, 0xaf, 0x7d, 0x0f, 0x6e, 0x7c, 0x7b, 0xcd, 0x55, 0x79},
+ {0x13, 0x68, 0x87, 0x59, 0x80, 0x77, 0x6f, 0x88, 0x54, 0x52, 0x7a, 0x07, 0x69, 0x0e, 0x96, 0x27},
+ {0x14, 0xee, 0xca, 0x33, 0x8b, 0x20, 0x86, 0x13, 0x48, 0x5e, 0xa0, 0x30, 0x8f, 0xd7, 0xa1, 0x5e},
+ {0xa1, 0xf1, 0xeb, 0xbe, 0xd8, 0xdb, 0xc1, 0x53, 0xc0, 0xb8, 0x4a, 0xa6, 0x1f, 0xf0, 0x82, 0x39},
+ {0x3b, 0x62, 0xa9, 0xba, 0x62, 0x58, 0xf5, 0x61, 0x0f, 0x83, 0xe2, 0x64, 0xf3, 0x14, 0x97, 0xb4},
+ {0x26, 0x44, 0x99, 0x06, 0x0a, 0xd9, 0xba, 0xab, 0xc4, 0x7f, 0x8b, 0x02, 0xbb, 0x6d, 0x71, 0xed},
+ {0x00, 0x11, 0x0d, 0xc3, 0x78, 0x14, 0x69, 0x56, 0xc9, 0x54, 0x47, 0xd3, 0xf3, 0xd0, 0xfb, 0xba},
+ {0x01, 0x51, 0xc5, 0x68, 0x38, 0x6b, 0x66, 0x77, 0xa2, 0xb4, 0xdc, 0x6f, 0x81, 0xe5, 0xdc, 0x18},
+ {0xd6, 0x26, 0xb2, 0x66, 0x90, 0x5e, 0xf3, 0x58, 0x82, 0x63, 0x4d, 0xf6, 0x85, 0x32, 0xc1, 0x25},
+ {0x98, 0x69, 0xe2, 0x47, 0xe9, 0xc0, 0x8b, 0x10, 0xd0, 0x29, 0x93, 0x4f, 0xc4, 0xb9, 0x52, 0xf7},
+ {0x31, 0xfc, 0xef, 0xac, 0x66, 0xd7, 0xde, 0x9c, 0x7e, 0xc7, 0x48, 0x5f, 0xe4, 0x49, 0x49, 0x02},
+ {0x54, 0x93, 0xe9, 0x99, 0x33, 0xb0, 0xa8, 0x11, 0x7e, 0x08, 0xec, 0x0f, 0x97, 0xcf, 0xc3, 0xd9},
+ {0x6e, 0xe2, 0xa4, 0xca, 0x67, 0xb0, 0x54, 0xbb, 0xfd, 0x33, 0x15, 0xbf, 0x85, 0x23, 0x05, 0x77},
+ {0x47, 0x3d, 0x06, 0xe8, 0x73, 0x8d, 0xb8, 0x98, 0x54, 0xc0, 0x66, 0xc4, 0x7a, 0xe4, 0x77, 0x40},
+ {0xa4, 0x26, 0xe5, 0xe4, 0x23, 0xbf, 0x48, 0x85, 0x29, 0x4d, 0xa4, 0x81, 0xfe, 0xae, 0xf7, 0x23},
+ {0x78, 0x01, 0x77, 0x31, 0xcf, 0x65, 0xfa, 0xb0, 0x74, 0xd5, 0x20, 0x89, 0x52, 0x51, 0x2e, 0xb1},
+ {0x9e, 0x25, 0xfc, 0x83, 0x3f, 0x22, 0x90, 0x73, 0x3e, 0x93, 0x44, 0xa5, 0xe8, 0x38, 0x39, 0xeb},
+ {0x56, 0x8e, 0x49, 0x5a, 0xbe, 0x52, 0x5a, 0x21, 0x8a, 0x22, 0x14, 0xcd, 0x3e, 0x07, 0x1d, 0x12},
+ {0x4a, 0x29, 0xb5, 0x45, 0x52, 0xd1, 0x6b, 0x9a, 0x46, 0x9c, 0x10, 0x52, 0x8e, 0xff, 0x0a, 0xae},
+ {0xc9, 0xd1, 0x84, 0xdd, 0xd5, 0xa9, 0xf5, 0xe0, 0xcf, 0x8c, 0xe2, 0x9a, 0x9a, 0xbf, 0x69, 0x1c},
+ {0x2d, 0xb4, 0x79, 0xae, 0x78, 0xbd, 0x50, 0xd8, 0x88, 0x2a, 0x8a, 0x17, 0x8a, 0x61, 0x32, 0xad},
+ {0x8e, 0xce, 0x5f, 0x04, 0x2d, 0x5e, 0x44, 0x7b, 0x50, 0x51, 0xb9, 0xea, 0xcb, 0x8d, 0x8f, 0x6f},
+ {0x9c, 0x0b, 0x53, 0xb4, 0xb3, 0xc3, 0x07, 0xe8, 0x7e, 0xae, 0xe0, 0x86, 0x78, 0x14, 0x1f, 0x66},
+ {0xab, 0xf2, 0x48, 0xaf, 0x69, 0xa6, 0xea, 0xe4, 0xbf, 0xd3, 0xeb, 0x2f, 0x12, 0x9e, 0xeb, 0x94},
+ {0x06, 0x64, 0xda, 0x16, 0x68, 0x57, 0x4b, 0x88, 0xb9, 0x35, 0xf3, 0x02, 0x73, 0x58, 0xae, 0xf4},
+ {0xaa, 0x4b, 0x9d, 0xc4, 0xbf, 0x33, 0x7d, 0xe9, 0x0c, 0xd4, 0xfd, 0x3c, 0x46, 0x7c, 0x6a, 0xb7},
+ {0xea, 0x5c, 0x7f, 0x47, 0x1f, 0xaf, 0x6b, 0xde, 0x2b, 0x1a, 0xd7, 0xd4, 0x68, 0x6d, 0x22, 0x87},
+ {0x29, 0x39, 0xb0, 0x18, 0x32, 0x23, 0xfa, 0xfc, 0x17, 0x23, 0xde, 0x4f, 0x52, 0xc4, 0x3d, 0x35},
+ {0x7c, 0x39, 0x56, 0xca, 0x5e, 0xea, 0xfc, 0x3e, 0x36, 0x3e, 0x9d, 0x55, 0x65, 0x46, 0xeb, 0x68},
+ {0x77, 0xc6, 0x07, 0x71, 0x46, 0xf0, 0x1c, 0x32, 0xb6, 0xb6, 0x9d, 0x5f, 0x4e, 0xa9, 0xff, 0xcf},
+ {0x37, 0xa6, 0x98, 0x6c, 0xb8, 0x84, 0x7e, 0xdf, 0x09, 0x25, 0xf0, 0xf1, 0x30, 0x9b, 0x54, 0xde},
+ {0xa7, 0x05, 0xf0, 0xe6, 0x9d, 0xa9, 0xa8, 0xf9, 0x07, 0x24, 0x1a, 0x2e, 0x92, 0x3c, 0x8c, 0xc8},
+ {0x3d, 0xc4, 0x7d, 0x1f, 0x29, 0xc4, 0x48, 0x46, 0x1e, 0x9e, 0x76, 0xed, 0x90, 0x4f, 0x67, 0x11},
+ {0x0d, 0x62, 0xbf, 0x01, 0xe6, 0xfc, 0x0e, 0x1a, 0x0d, 0x3c, 0x47, 0x51, 0xc5, 0xd3, 0x69, 0x2b},
+ {0x8c, 0x03, 0x46, 0x8b, 0xca, 0x7c, 0x66, 0x9e, 0xe4, 0xfd, 0x5e, 0x08, 0x4b, 0xbe, 0xe7, 0xb5},
+ {0x52, 0x8a, 0x5b, 0xb9, 0x3b, 0xaf, 0x2c, 0x9c, 0x44, 0x73, 0xcc, 0xe5, 0xd0, 0xd2, 0x2b, 0xd9},
+ {0xdf, 0x6a, 0x30, 0x1e, 0x95, 0xc9, 0x5d, 0xad, 0x97, 0xae, 0x0c, 0xc8, 0xc6, 0x91, 0x3b, 0xd8},
+ {0x80, 0x11, 0x89, 0x90, 0x2c, 0x85, 0x7f, 0x39, 0xe7, 0x35, 0x91, 0x28, 0x5e, 0x70, 0xb6, 0xdb},
+ {0xe6, 0x17, 0x34, 0x6a, 0xc9, 0xc2, 0x31, 0xbb, 0x36, 0x50, 0xae, 0x34, 0xcc, 0xca, 0x0c, 0x5b},
+ {0x27, 0xd9, 0x34, 0x37, 0xef, 0xb7, 0x21, 0xaa, 0x40, 0x18, 0x21, 0xdc, 0xec, 0x5a, 0xdf, 0x89},
+ {0x89, 0x23, 0x7d, 0x9d, 0xed, 0x9c, 0x5e, 0x78, 0xd8, 0xb1, 0xc9, 0xb1, 0x66, 0xcc, 0x73, 0x42},
+ {0x4a, 0x6d, 0x80, 0x91, 0xbf, 0x5e, 0x7d, 0x65, 0x11, 0x89, 0xfa, 0x94, 0xa2, 0x50, 0xb1, 0x4c},
+ {0x0e, 0x33, 0xf9, 0x60, 0x55, 0xe7, 0xae, 0x89, 0x3f, 0xfc, 0x0e, 0x3d, 0xcf, 0x49, 0x29, 0x02},
+ {0xe6, 0x1c, 0x43, 0x2b, 0x72, 0x0b, 0x19, 0xd1, 0x8e, 0xc8, 0xd8, 0x4b, 0xdc, 0x63, 0x15, 0x1b},
+ {0xf7, 0xe5, 0xae, 0xf5, 0x49, 0xf7, 0x82, 0xcf, 0x37, 0x90, 0x55, 0xa6, 0x08, 0x26, 0x9b, 0x16},
+ {0x43, 0x8d, 0x03, 0x0f, 0xd0, 0xb7, 0xa5, 0x4f, 0xa8, 0x37, 0xf2, 0xad, 0x20, 0x1a, 0x64, 0x03},
+ {0xa5, 0x90, 0xd3, 0xee, 0x4f, 0xbf, 0x04, 0xe3, 0x24, 0x7e, 0x0d, 0x27, 0xf2, 0x86, 0x42, 0x3f},
+ {0x5f, 0xe2, 0xc1, 0xa1, 0x72, 0xfe, 0x93, 0xc4, 0xb1, 0x5c, 0xd3, 0x7c, 0xae, 0xf9, 0xf5, 0x38},
+ {0x2c, 0x97, 0x32, 0x5c, 0xbd, 0x06, 0xb3, 0x6e, 0xb2, 0x13, 0x3d, 0xd0, 0x8b, 0x3a, 0x01, 0x7c},
+ {0x92, 0xc8, 0x14, 0x22, 0x7a, 0x6b, 0xca, 0x94, 0x9f, 0xf0, 0x65, 0x9f, 0x00, 0x2a, 0xd3, 0x9e},
+ {0xdc, 0xe8, 0x50, 0x11, 0x0b, 0xd8, 0x32, 0x8c, 0xfb, 0xd5, 0x08, 0x41, 0xd6, 0x91, 0x1d, 0x87},
+ {0x67, 0xf1, 0x49, 0x84, 0xc7, 0xda, 0x79, 0x12, 0x48, 0xe3, 0x2b, 0xb5, 0x92, 0x25, 0x83, 0xda},
+ {0x19, 0x38, 0xf2, 0xcf, 0x72, 0xd5, 0x4e, 0xe9, 0x7e, 0x94, 0x16, 0x6f, 0xa9, 0x1d, 0x2a, 0x36},
+ {0x74, 0x48, 0x1e, 0x96, 0x46, 0xed, 0x49, 0xfe, 0x0f, 0x62, 0x24, 0x30, 0x16, 0x04, 0x69, 0x8e},
+ {0x57, 0xfc, 0xa5, 0xde, 0x98, 0xa9, 0xd6, 0xd8, 0x00, 0x64, 0x38, 0xd0, 0x58, 0x3d, 0x8a, 0x1d},
+ {0x9f, 0xec, 0xde, 0x1c, 0xef, 0xdc, 0x1c, 0xbe, 0xd4, 0x76, 0x36, 0x74, 0xd9, 0x57, 0x53, 0x59},
+ {0xe3, 0x04, 0x0c, 0x00, 0xeb, 0x28, 0xf1, 0x53, 0x66, 0xca, 0x73, 0xcb, 0xd8, 0x72, 0xe7, 0x40},
+ {0x76, 0x97, 0x00, 0x9a, 0x6a, 0x83, 0x1d, 0xfe, 0xcc, 0xa9, 0x1c, 0x59, 0x93, 0x67, 0x0f, 0x7a},
+ {0x58, 0x53, 0x54, 0x23, 0x21, 0xf5, 0x67, 0xa0, 0x05, 0xd5, 0x47, 0xa4, 0xf0, 0x47, 0x59, 0xbd},
+ {0x51, 0x50, 0xd1, 0x77, 0x2f, 0x50, 0x83, 0x4a, 0x50, 0x3e, 0x06, 0x9a, 0x97, 0x3f, 0xbd, 0x7c},
+}
+
func TestSum64(t *testing.T) {
for i, v := range golden {
h := New(v.k)
@@ -167,6 +235,43 @@ func TestSum(t *testing.T) {
}
}
+func TestSumUnaligned(t *testing.T) {
+ const align = 8
+ var k [16]byte
+ var in [64 + align]byte
+ for i := range k {
+ k[i] = byte(i)
+ }
+
+ for a := 1; a < align; a++ {
+ for i := 0; i < 64; i++ {
+ in[a+i] = byte(i)
+ h := New(k[:])
+ h.Write(in[a : a+i])
+ if sum := h.Sum(nil); !bytes.Equal(sum, goldenRef[i]) {
+ t.Errorf(`%d: expected "%x", got "%x"`, i, goldenRef[i], sum)
+ }
+ }
+ }
+}
+
+func TestSum128(t *testing.T) {
+ var k [16]byte
+ var in [64]byte
+ for i := range k {
+ k[i] = byte(i)
+ }
+
+ for i := 0; i < 64; i++ {
+ in[i] = byte(i)
+ h := New128(k[:])
+ h.Write(in[:i])
+ if sum := h.Sum(nil); !bytes.Equal(sum, goldenRef128[i]) {
+ t.Errorf(`%d: expected "%x", got "%x"`, i, goldenRef128[i], sum)
+ }
+ }
+}
+
func TestHash(t *testing.T) {
var k0, k1 uint64
for i, v := range golden {
@@ -194,10 +299,118 @@ func TestHash(t *testing.T) {
}
}
-var key = zeroKey
-var key0, key1 uint64
-var bench = New(key)
-var buf = make([]byte, 8<<10)
+func TestHashUnaligned(t *testing.T) {
+ const align = 8
+ var k0, k1 uint64
+ var k [16]byte
+ var in [64 + align]byte
+
+ for i := range k {
+ k[i] = byte(i)
+ }
+ k0 = binary.LittleEndian.Uint64(k[0:8])
+ k1 = binary.LittleEndian.Uint64(k[8:16])
+
+ for a := 1; a < align; a++ {
+ for i := 0; i < 64; i++ {
+ in[a+i] = byte(i)
+ ref := binary.LittleEndian.Uint64(goldenRef[i])
+ if sum := Hash(k0, k1, in[a:a+i]); sum != ref {
+ t.Errorf(`%d: expected "%x", got "%x"`, i, ref, sum)
+ }
+ }
+ }
+}
+
+func TestHash128(t *testing.T) {
+ var k0, k1 uint64
+
+ var k [16]byte
+ var in [64]byte
+ for i := range k {
+ k[i] = byte(i)
+ }
+ k0 = binary.LittleEndian.Uint64(k[0:8])
+ k1 = binary.LittleEndian.Uint64(k[8:16])
+
+ for i := 0; i < 64; i++ {
+ in[i] = byte(i)
+ ref0 := binary.LittleEndian.Uint64(goldenRef128[i][0:])
+ ref1 := binary.LittleEndian.Uint64(goldenRef128[i][8:])
+ if sum0, sum1 := Hash128(k0, k1, in[:i]); sum0 != ref0 || sum1 != ref1 {
+ t.Errorf(`%d: expected "%x, %x", got "%x, %x"`, i, ref0, ref1, sum0, sum1)
+ }
+ }
+}
+
+func TestAlign(t *testing.T) {
+ data := "0076a9143219adce9b6f0a21fd53cb17e2fd9b2b4fac40b388ac"
+ k0 := uint64(316665572293978160)
+ k1 := uint64(8573005253291875333)
+
+ want := []uint64{
+ 16380727507974277821,
+ 16770526497674945769,
+ 11373998677292870540,
+ 10374222295991299613,
+ }
+ want128 := []uint64{
+ 14802151199638645495,
+ 13251497035884452880,
+ 7034723853391616289,
+ 16742813562040528752,
+ 10468120447644272532,
+ 10941274532208162335,
+ 11293904790559355408,
+ 15432350433573653068,
+ }
+
+ d, err := hex.DecodeString(data)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var k [16]byte
+ binary.LittleEndian.PutUint64(k[0:], k0)
+ binary.LittleEndian.PutUint64(k[8:], k1)
+
+ for i := range want {
+ res := Hash(k0, k1, d[i:])
+ if res != want[i] {
+ t.Fatalf("Expected %v got %v", want[i], res)
+ }
+ reslo, reshi := Hash128(k0, k1, d[i:])
+ if reslo != want128[i*2] {
+ t.Fatalf("Expected %v got %v", want128[i*2], reslo)
+ }
+ if reshi != want128[i*2+1] {
+ t.Fatalf("Expected %v got %v", want128[i*2+1], reshi)
+ }
+ dig := newDigest(Size, k[:])
+ dig.Write(d[i:])
+ res = dig.Sum64()
+ if res != want[i] {
+ t.Fatalf("Expected %v got %v", want[i], res)
+ }
+ dig128 := newDigest(Size128, k[:])
+ dig128.Write(d[i:])
+ reslo, reshi = dig128.sum128()
+ if reslo != want128[i*2] {
+ t.Fatalf("Expected %v got %v", want128[i*2], reslo)
+ }
+ if reshi != want128[i*2+1] {
+ t.Fatalf("Expected %v got %v", want128[i*2+1], reshi)
+ }
+ }
+}
+
+var (
+ key = zeroKey
+ key0, key1 uint64
+ bench = New(key)
+ bench128 = New128(key)
+ buf = make([]byte, 8<<10)
+)
func BenchmarkHash8(b *testing.B) {
b.SetBytes(8)
@@ -241,6 +454,13 @@ func BenchmarkHash1K(b *testing.B) {
}
}
+func BenchmarkHash1Kunaligned(b *testing.B) {
+ b.SetBytes(1024)
+ for i := 0; i < b.N; i++ {
+ Hash(key0, key1, buf[1:1025])
+ }
+}
+
func BenchmarkHash8K(b *testing.B) {
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
@@ -248,6 +468,55 @@ func BenchmarkHash8K(b *testing.B) {
}
}
+func BenchmarkHash128_8(b *testing.B) {
+ b.SetBytes(8)
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf[:8])
+ }
+}
+
+func BenchmarkHash128_16(b *testing.B) {
+ b.SetBytes(16)
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf[:16])
+ }
+}
+
+func BenchmarkHash128_40(b *testing.B) {
+ b.SetBytes(40)
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf[:40])
+ }
+}
+
+func BenchmarkHash128_64(b *testing.B) {
+ b.SetBytes(64)
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf[:64])
+ }
+}
+
+func BenchmarkHash128_128(b *testing.B) {
+ b.SetBytes(128)
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf[:128])
+ }
+}
+
+func BenchmarkHash128_1K(b *testing.B) {
+ b.SetBytes(1024)
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf[:1024])
+ }
+}
+
+func BenchmarkHash128_8K(b *testing.B) {
+ b.SetBytes(int64(len(buf)))
+ for i := 0; i < b.N; i++ {
+ Hash128(key0, key1, buf)
+ }
+}
+
func BenchmarkFull8(b *testing.B) {
b.SetBytes(8)
for i := 0; i < b.N; i++ {
@@ -302,6 +571,15 @@ func BenchmarkFull1K(b *testing.B) {
}
}
+func BenchmarkFull1Kunaligned(b *testing.B) {
+ b.SetBytes(1024)
+ for i := 0; i < b.N; i++ {
+ bench.Reset()
+ bench.Write(buf[1:1025])
+ bench.Sum64()
+ }
+}
+
func BenchmarkFull8K(b *testing.B) {
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
@@ -310,3 +588,66 @@ func BenchmarkFull8K(b *testing.B) {
bench.Sum64()
}
}
+
+func BenchmarkFull128_8(b *testing.B) {
+ b.SetBytes(8)
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf[:8])
+ bench128.Sum(nil)
+ }
+}
+
+func BenchmarkFull128_16(b *testing.B) {
+ b.SetBytes(16)
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf[:16])
+ bench128.Sum(nil)
+ }
+}
+
+func BenchmarkFull128_40(b *testing.B) {
+ b.SetBytes(24)
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf[:16])
+ bench128.Sum(nil)
+ }
+}
+
+func BenchmarkFull128_64(b *testing.B) {
+ b.SetBytes(64)
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf[:64])
+ bench128.Sum(nil)
+ }
+}
+
+func BenchmarkFull128_128(b *testing.B) {
+ b.SetBytes(128)
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf[:64])
+ bench128.Sum(nil)
+ }
+}
+
+func BenchmarkFull128_1K(b *testing.B) {
+ b.SetBytes(1024)
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf[:1024])
+ bench128.Sum(nil)
+ }
+}
+
+func BenchmarkFull128_8K(b *testing.B) {
+ b.SetBytes(int64(len(buf)))
+ for i := 0; i < b.N; i++ {
+ bench128.Reset()
+ bench128.Write(buf)
+ bench128.Sum(nil)
+ }
+}