Codebase list tlsh / dc01c7a
Merge branch 'master' of gitlab.tw.trendnet.org:data-analytics-tlsh/tlsh Vic Hargrave 8 years ago
19 changed file(s) with 1001 addition(s) and 71 deletion(s). Raw diff Collapse all Expand all
44 Testing/tmp
55 test/simple_unittest
66 test/tlsh_version
7 VERSION
8 include/version.h
1616 project(TLSH)
1717
1818 set(VERSION_MAJOR 3)
19 set(VERSION_MINOR 2)
20 set(VERSION_PATCH 1)
19 set(VERSION_MINOR 4)
20 set(VERSION_PATCH 0)
2121
2222 # TLSH uses only half the counting buckets.
2323 # It can use all the buckets now.
4242 # write a file with the VERSION information
4343 file(REMOVE VERSION)
4444 file(WRITE VERSION
45 "// This file is generated by cmake. Modify\n"
46 "// CMakeLists.txt to change the VERSION numbers\n"
4547 "TLSH version: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH} ${TLSH_HASH}, ${TLSH_CHECKSUM}\n")
4648
4749 file(REMOVE include/version.h)
22 =======================================
33
44 TLSH is a fuzzy matching library. Given a byte stream with a minimum length
5 of 512 bytes (and a minimum amount of randomness - see note in Python
5 of 256 bytes (and a minimum amount of randomness - see note in Python
66 extension below), TLSH generates a hash value which can be used for similarity
77 comparisons. Similar objects will have similar hash values which allows for
88 the detection of similar objects by comparing their hash values. Note that
2626 hash values to determine similarity. Run it with no parameters for detailed usage.
2727
2828 TLSH has been ported to Java, which can be found at https://github.com/triplecheck/TLSH
29 TLSH has been ported to Javascript, which can be found in the js_ext directory
2930
3031 =======================================
3132 Downloading TLSH
7576
7677 import tlsh
7778 tlsh.hash(data)
78 - note that the data must contain at least 512 bytes to generate a hash value and that
79 - note that the data must contain at least 256 bytes to generate a hash value and that
7980 it must have a certain amount of randomness.
80 For example, tlsh.hash(str(os.urandom(512))), should always generate a hash.
81 To get the hash value of a file, try tlsh.hash(open(file, 'rb').read())
81 For example, tlsh.hash(str(os.urandom(256))), should always generate a hash.
82 To get the hash value of a file, try tlsh.hash(open(file, 'rb').read())
8283
8384 tlsh.diff(h1, h2)
8485 tlsh.diffxlen(h1, h2)
169170 - Add -version flag to tlsh_unittest to get the version of the tlsh library.
170171 3.2.1 - Pickup fix to hash_py() in py_ext/tlshmodule.cpp
171172 (commit da5370bcfdd40dd6a33c877ee87fe3866188cf2d)
173 3.3.0 - Made the minimum data length = 256 for the C version
174 3.3.1 - Fixed bug introduced by commit 1a8f1c581c8b988ced683ff8e0a0f9c574058df4
175 which caused a different hash value to be generated if there were multiple
176 calls to Tlsh::update as opposed to a single call to Tlsh::update.
177 3.4.0 - Add javascript implementation (see directory js_ext) - required for
178 Blackhat presentation - https://www.blackhat.com/us-15/speakers/Sean-Park.html
0 str1 = 'This is a test for Lili Diao. This is a string. Hello Hello Hello OPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQ'
1 str2 = 'This is a test for Jon Oliver. This is a string. Hello Hello Hello PQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHI'
2 hash1 = 09F05A198CC69A5A4F0F9380A9EE93F2B927CF42089EA74276DC5F0BB2D34E68114448
3 hash2 = 301124198C869A5A4F0F9380A9AE92F2B9278F42089EA34272885F0FB2D34E6911444C
4 difference (same strings) = 0
5 difference (with len) = 121
6 difference (without len) = 97
7 Testing Tlsh with multiple update calls
8 hash3 = 09F05A198CC69A5A4F0F9380A9EE93F2B927CF42089EA74276DC5F0BB2D34E68114448
9 hash4 = 301124198C869A5A4F0F9380A9AE92F2B9278F42089EA34272885F0FB2D34E6911444C
10 Testing Tlsh.fromTlshStr()
11 Recreating tlsh3 from 09F05A198CC69A5A4F0F9380A9EE93F2B927CF42089EA74276DC5F0BB2D34E68114448
12 hash3 = 09F05A198CC69A5A4F0F9380A9EE93F2B927CF42089EA74276DC5F0BB2D34E68114448
13 Recreating tlsh4 from 301124198C869A5A4F0F9380A9AE92F2B9278F42089EA34272885F0FB2D34E6911444C
14 hash4 = 301124198C869A5A4F0F9380A9AE92F2B9278F42089EA34272885F0FB2D34E6911444C
15 difference (same strings) = 0
16 difference (with len) = 121
17 difference (without len) = 97
1313 if test ! -f ../bin/tlsh_unittest
1414 then
1515 echoerr "error: (127), you must compile tlsh_unittest"
16 popd > /dev/null
17 exit 127
18 fi
19
20 if test ! -f ../test/simple_unittest
21 then
22 echoerr "error: (127), you must compile ../test/simple_unittest"
1623 popd > /dev/null
1724 exit 127
1825 fi
177184 runit
178185 runit "-xlen"
179186
187 echo "Running simple_unittest"
188 ../test/simple_unittest > $TMP/simple_unittest.out
189 diff --ignore-all-space $TMP/simple_unittest.out exp/simple_unittest_EXP > /dev/null 2>/dev/null
190 if [ $? -ne 0 ]; then
191 echoerr "error: diff $TMP/simple_unittest.out exp/simple_unittest_EXP"
192 popd > /dev/null
193 exit -1
194 fi
195
196 echo "passed"
197
180198 popd > /dev/null
+0
-1
VERSION less more
0 TLSH version: 3.2.1 compact hash, 1 byte checksum
00 #!/bin/sh
11
2 echo "rm -rf bin build lib Testing/tmp test/simple_unittest test/tlsh_version"
3 rm -rf bin build lib Testing/tmp test/simple_unittest test/tlsh_version
2 echo "rm -rf bin build lib Testing/tmp test/simple_unittest test/tlsh_version test/tlsh_unittest"
3 rm -rf bin build lib Testing/tmp test/simple_unittest test/tlsh_version test/tlsh_unittest
2525 #ifdef __cplusplus
2626
2727 #include "tlsh_impl.h"
28
29 // changed the minimum data length to 256 for version 3.3
30 #define MIN_DATA_LENGTH 256
2831
2932 class TLSH_API Tlsh{
3033
6969
7070 private:
7171 unsigned int *a_bucket;
72 unsigned char slide_window[SLIDING_WND_SIZE];
7273 unsigned int data_len;
7374
7475 struct lsh_bin_struct {
+0
-10
include/version.h less more
0 /****************************************************
1 * This file is generated by cmake. Modify the top
2 * level CMakeLists.txt to change the VERSION numbers
3 ****************************************************/
4
5 #define VERSION_MAJOR 3
6 #define VERSION_MINOR 2
7 #define VERSION_PATCH 1
8 #define TLSH_HASH "compact hash"
9 #define TLSH_CHECKSUM "1 byte checksum"
0 <!DOCTYPE html>
1 <html>
2 <head>
3 </head>
4 <body>
5
6 <p>TLSH simpletest demo:</p>
7 <p id="tlsh_simple_test"></p>
8
9 <script src="tlsh.js"> </script>
10
11
12 <script type="text/javascript">
13
14 /*
15 * Tester for tlsh.js.
16 * Output to correspond to output from C++ executable, simple_test
17 */
18
19 var debug = false;
20 document.getElementById("tlsh_simple_test").innerHTML = "Output to correspond to output from the C++ executable, simple_test";
21 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
22 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
23
24 /***************************************************************************************************
25 * Create str1 from 2 substrings
26 */
27 var str_1_a = "This is a test for Lili Diao. This is a string. Hello Hello Hello ";
28 var str_1_b = "OPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQ";
29 var str_1 = str_1_a + str_1_b;
30 debug && console.log("str1 = '"+str_1+"'");
31 document.getElementById("tlsh_simple_test").innerHTML += "str1 = '" + str_1 + "'";
32 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
33
34 /***************************************************************************************************
35 * Create Tlsh from str1
36 */
37 var tlsh1 = new Tlsh();
38 tlsh1.update(str_1, str_1.length+1); // str_1 includes NULL byte at end in simple_test.cpp, so add 1 to the length
39 tlsh1.finale();
40
41 /***************************************************************************************************
42 * Create str2 from 2 substrings
43 */
44 var str_2_a = "This is a test for Jon Oliver. This is a string. Hello Hello Hello ";
45 var str_2_b = "PQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHI";
46 var str_2 = str_2_a + str_2_b;
47 debug && console.log("str2 = '"+str_2+"'");
48 document.getElementById("tlsh_simple_test").innerHTML += "str2 = '" + str_2 + "'";
49 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
50
51 /***************************************************************************************************
52 * Create Tlsh from str2
53 */
54 var tlsh2 = new Tlsh();
55 tlsh2.update(str_2, str_2.length+1); // str_2 includes NULL byte at end in simple_test.cpp, so add 1 to the length
56 tlsh2.finale();
57 var hash2 = tlsh2.hash();
58
59 /***************************************************************************************************
60 * Get differences (with and without length) and output.
61 */
62 var hash1 = tlsh1.hash();
63 var diff_same_str = tlsh1.totalDiff(tlsh1);
64 var diff_with_len = tlsh1.totalDiff(tlsh2);
65 var diff_without_len = tlsh1.totalDiff(tlsh2, false);
66
67 debug && console.log("hash1 = "+hash1);
68 debug && console.log("hash2 = "+hash2);
69 debug && console.log("difference (same strings) = " + diff_same_str);
70 debug && console.log("difference (with len) = " + diff_with_len);
71 debug && console.log("difference (without len) = " + diff_without_len);
72
73 document.getElementById("tlsh_simple_test").innerHTML += "hash1 = " + hash1;
74 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
75 document.getElementById("tlsh_simple_test").innerHTML += "hash2 = " + hash2;
76 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
77 document.getElementById("tlsh_simple_test").innerHTML += "difference (same strings) = " + diff_same_str;
78 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
79 document.getElementById("tlsh_simple_test").innerHTML += "difference (with len) = " + diff_with_len;
80 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
81 document.getElementById("tlsh_simple_test").innerHTML += "difference (without len) = " + diff_without_len;
82 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
83
84 /***************************************************************************************************
85 * Create Tlsh from substrings of str1 and verify that the hash values are the same
86 */
87 debug && console.log("Testing Tlsh with multiple update calls");
88 document.getElementById("tlsh_simple_test").innerHTML += "Testing Tlsh with multiple update calls";
89 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
90 var tlsh3 = new Tlsh();
91 tlsh3.update(str_1_a);
92 tlsh3.update(str_1_b, str_1_b.length+1);
93 tlsh3.finale();
94 var hash3 = tlsh3.hash();
95 if (hash1 != hash3) {
96 document.getElementById("tlsh_simple_test").innerHTML = "ERROR: tlsh1 != tlsh3";
97 throw("ERROR: tlsh1 != tlsh3");
98 }
99 debug && console.log("hash3 = " + hash4);
100 document.getElementById("tlsh_simple_test").innerHTML += "hash3 = " + hash3;
101 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
102
103 /***************************************************************************************************
104 * Create Tlsh from substrings of str2 and verify that the hash values are the same
105 */
106 var tlsh4 = new Tlsh();
107 tlsh4.update(str_2_a);
108 tlsh4.finale(str_2_b, str_2_b.length + 1);
109 var hash4 = tlsh4.hash();
110 if (hash2 != hash4) {
111 document.getElementById("tlsh_simple_test").innerHTML = "ERROR: tlsh2 != tlsh4";
112 throw("ERROR: tlsh2 != tlsh4");
113 }
114 debug && console.log("hash4 = " + hash4);
115 document.getElementById("tlsh_simple_test").innerHTML += "hash4 = " + hash4;
116 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
117
118 /***************************************************************************************************
119 * Create Tlsh fromTlshStr() method of hash1 and verify that the hash values are the same
120 */
121 debug && console.log("Testing Tlsh.fromTlshStr()");
122 debug && console.log("Recreating tlsh3 from "+hash1);
123 document.getElementById("tlsh_simple_test").innerHTML += "Testing Tlsh.fromTlshStr()";
124 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
125 document.getElementById("tlsh_simple_test").innerHTML += "Recreating tlsh3 from " + hash1;
126 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
127
128 tlsh3.reset();
129 tlsh3.fromTlshStr(hash1);
130 hash3 = tlsh3.hash();
131 if (hash1 != hash3) {
132 document.getElementById("tlsh_simple_test").innerHTML = "ERROR: hash1 != tlsh3.hash()";
133 throw("ERROR: hash1 != tlsh3.hash()");
134 }
135
136 debug && console.log("hash3 = " + hash3);
137 document.getElementById("tlsh_simple_test").innerHTML += "hash3 = " + hash3;
138 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
139
140 debug && console.log("Recreating tlsh4 from "+hash1);
141 document.getElementById("tlsh_simple_test").innerHTML += "Recreating tlsh4 from " + hash2;
142 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
143
144 tlsh4.reset();
145 tlsh4.fromTlshStr(hash2);
146 debug && console.log("hash4 = " + hash4);
147 document.getElementById("tlsh_simple_test").innerHTML += "hash4 = " + hash4;
148 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
149
150 diff_with_len = tlsh3.totalDiff(tlsh4);
151 diff_without_len = tlsh3.totalDiff(tlsh4, false);
152 diff_same_str = tlsh3.totalDiff(tlsh3);
153 debug && console.log("difference (same strings) = " + diff_same_str);
154 debug && console.log("difference (with len) = " + diff_with_len);
155 debug && console.log("difference (without len) = " + diff_without_len);
156
157 document.getElementById("tlsh_simple_test").innerHTML += "difference (same strings) = " + diff_same_str;
158 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
159 document.getElementById("tlsh_simple_test").innerHTML += "difference (with len) = " + diff_with_len;
160 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
161 document.getElementById("tlsh_simple_test").innerHTML += "difference (without len) = " + diff_without_len;
162 document.getElementById("tlsh_simple_test").innerHTML += "<br>";
163
164 </script>
165 </body>
166 </html>
167
0 /*
1 * Copyright 2013 Trend Micro Incorporated
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 /*
17 * Port of C++ implementation tlsh to javascript.
18 *
19 * Construct Tlsh object with methods:
20 * update
21 * finale
22 * fromTlshStr
23 * reset
24 * hash
25 * totalDiff
26 *
27 * See tlsh.html for example use.
28 */
29
30 var debug = false;
31 ///////////////////////////////////////////////////////////////////////////////////
32 // From tlsh_util.cpp
33 var v_table = new Uint8Array([
34 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
35 14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
36 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
37 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
38 97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
39 174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
40 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
41 119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
42 138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
43 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
44 125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
45 118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
46 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
47 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
48 140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
49 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209]);
50
51 function b_mapping(salt, i, j, k)
52 {
53 var h = 0;
54
55 h = v_table[h ^ salt];
56 h = v_table[h ^ i];
57 h = v_table[h ^ j];
58 h = v_table[h ^ k];
59 return h;
60 }
61
62 var LOG_1_5 = 0.4054651;
63 var LOG_1_3 = 0.26236426;
64 var LOG_1_1 = 0.095310180;
65
66 function l_capturing(len) {
67 var i;
68 if( len <= 656 ) {
69 i = Math.floor( Math.log(len) / LOG_1_5 );
70 } else if( len <= 3199 ) {
71 i = Math.floor( Math.log(len) / LOG_1_3 - 8.72777 );
72 } else {
73 i = Math.floor( Math.log(len) / LOG_1_1 - 62.5472 );
74 }
75
76 return (i & 0xFF);
77 }
78
79 function swap_byte( i )
80 {
81 var byte = 0;
82 byte = ((i & 0xF0) >> 4) & 0x0F;
83 byte |= ((i & 0x0F) << 4) & 0xF0;
84 return byte;
85 }
86
87 function to_hex( data, len )
88 {
89 // Use TLSH.java implementation for to_hex
90 var s = new String;
91 for (var i=0; i<len; i++) {
92 if (data[i] < 16) {
93 s = s.concat("0");
94 }
95 debug && console.log("to_hex: "+data[i]);
96 s = s.concat(data[i].toString(16).toUpperCase());
97 }
98
99 return s;
100 }
101
102 function from_hex( str )
103 {
104 // Use TLSH.java implementation for from_hex
105 var ret = new Uint8Array(str.length / 2); // unsigned char array}
106 for (var i = 0; i < str.length; i += 2) {
107 ret[i / 2] = parseInt(str.substring(i, i + 2), 16);
108 }
109 return ret;
110 }
111
112 function mod_diff(x, y, R)
113 {
114 var dl = 0;
115 var dr = 0;
116 if ( y > x ){
117 dl = y - x;
118 dr = x + R - y;
119 }else{
120 dl = x - y;
121 dr = y + R - x;
122 }
123 return (dl > dr ? dr : dl);
124 }
125
126 // Use generateTable() from TLSH.java implementation
127 function generateTable()
128 {
129 var arraySize = 256;
130 var result = new Array(arraySize);
131 for (var i=0; i<result.length; i++)
132 {
133 result[i] = new Uint8Array(arraySize);
134 }
135
136 for (var i = 0; i < arraySize; i++) {
137 for (var j = 0; j < arraySize; j++) {
138 var x = i, y = j, d, diff = 0;
139 d = Math.abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
140 x = Math.floor(x / 4);
141 y = Math.floor(y / 4);
142
143 d = Math.abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
144 x = Math.floor(x / 4);
145 y = Math.floor(y / 4);
146
147 d = Math.abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
148 x = Math.floor(x / 4);
149 y = Math.floor(y / 4);
150
151 d = Math.abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
152 result[i][j] = diff;
153 }
154 }
155 return result;
156 }
157
158 var bit_pairs_diff_table = generateTable();
159
160 function h_distance( len, x, y)
161 {
162 var diff = 0;
163 for( var i=0; i<len; i++ ){
164 debug && console.log("bit_pairs_diff_table["+x[i]+"]["+y[i]+"]="+bit_pairs_diff_table[x[i]][y[i]]);
165 diff += bit_pairs_diff_table[ x[i] ][ y[i] ];
166 }
167 debug && console.log("h_distance returning "+diff);
168 return diff;
169 }
170
171 ///////////////////////////////////////////////////////////////////////////////////
172 // from C #defines in tlsh_impl.h and tlsh_impl.cpp
173 var SLIDING_WND_SIZE = 5;
174 var RNG_SIZE = SLIDING_WND_SIZE;
175 function RNG_IDX(i) { return (i+RNG_SIZE) % RNG_SIZE; }
176 var TLSH_CHECKSUM_LEN = 1;
177 var BUCKETS = 256;
178 var EFF_BUCKETS = 128;
179 var CODE_SIZE = 32; // 128 * 2 bits = 32 bytes
180 var TLSH_STRING_LEN = 70; // 2 + 1 + 32 bytes = 70 hexidecimal chars
181 var RANGE_LVALUE = 256;
182 var RANGE_QRATIO = 16;
183
184 function SWAP_UINT(buf, x, y)
185 {
186 var int_tmp = buf.bucket_copy[x];
187 buf.bucket_copy[x] = buf.bucket_copy[y];
188 buf.bucket_copy[y] = int_tmp;
189 }
190
191 ///////////////////////////////////////////////////////////////////////////////////
192 // TLSH member and non-member functions - from tlsh_impl.cpp
193
194 function partition(buf, left, right)
195 {
196 if( left == right ) {
197 return left;
198 }
199 if( left+1 == right ) {
200 if( buf.bucket_copy[left] > buf.bucket_copy[right] ) {
201 SWAP_UINT( buf, left, right );
202 }
203 return left;
204 }
205
206 var ret = left;
207 var pivot = (left + right)>>1;
208
209 var val = buf.bucket_copy[pivot];
210
211 buf.bucket_copy[pivot] = buf.bucket_copy[right];
212 buf.bucket_copy[right] = val;
213
214 for( var i = left; i < right; i++ ) {
215 if( buf.bucket_copy[i] < val ) {
216 SWAP_UINT( buf, ret, i );
217 ret++;
218 }
219 }
220 buf.bucket_copy[right] = buf.bucket_copy[ret];
221 buf.bucket_copy[ret] = val;
222
223 return ret;
224 }
225
226 function find_quartile(tlsh, quartiles)
227 {
228 var buf = new Object();
229 buf.bucket_copy = new Uint32Array(EFF_BUCKETS);
230 var short_cut_left = new Uint32Array(EFF_BUCKETS);
231 var short_cut_right = new Uint32Array(EFF_BUCKETS);
232 var spl = 0;
233 var spr = 0;
234 var p1 = EFF_BUCKETS/4-1;
235 var p2 = EFF_BUCKETS/2-1;
236 var p3 = EFF_BUCKETS-EFF_BUCKETS/4-1;
237 var end = EFF_BUCKETS-1;
238
239 for(var i=0; i<=end; i++) {
240 buf.bucket_copy[i] = tlsh.a_bucket[i];
241 }
242
243 for( var l=0, r=end; ; ) {
244 var ret = partition( buf, l, r );
245 if( ret > p2 ) {
246 r = ret - 1;
247 short_cut_right[spr] = ret;
248 spr++;
249 } else if( ret < p2 ){
250 l = ret + 1;
251 short_cut_left[spl] = ret;
252 spl++;
253 } else {
254 quartiles.q2 = buf.bucket_copy[p2];
255 break;
256 }
257 }
258
259 short_cut_left[spl] = p2-1;
260 short_cut_right[spr] = p2+1;
261
262 for( var i=0, l=0; i<=spl; i++ ) {
263 var r = short_cut_left[i];
264 if( r > p1 ) {
265 for( ; ; ) {
266 var ret = partition( buf, l, r );
267 if( ret > p1 ) {
268 r = ret-1;
269 } else if( ret < p1 ) {
270 l = ret+1;
271 } else {
272 quartiles.q1 = buf.bucket_copy[p1];
273 break;
274 }
275 }
276 break;
277 } else if( r < p1 ) {
278 l = r;
279 } else {
280 quartiles.q1 = buf.bucket_copy[p1];
281 break;
282 }
283 }
284
285 for( var i=0, r=end; i<=spr; i++ ) {
286 var l = short_cut_right[i];
287 if( l < p3 ) {
288 for( ; ; ) {
289 var ret = partition( buf, l, r );
290 if( ret > p3 ) {
291 r = ret-1;
292 } else if( ret < p3 ) {
293 l = ret+1;
294 } else {
295 quartiles.q3 = buf.bucket_copy[p3];
296 break;
297 }
298 }
299 break;
300 } else if( l > p3 ) {
301 r = l;
302 } else {
303 quartiles.q3 = buf.bucket_copy[p3];
304 break;
305 }
306 }
307 }
308
309 ///////////////////////////////////////////////////////////////////////////////////
310 // Definition of tlsh object
311 var Tlsh = function ()
312 {
313 this.checksum = new Uint8Array(TLSH_CHECKSUM_LEN); // unsigned char array
314 this.slide_window = new Uint8Array(SLIDING_WND_SIZE);
315 this.a_bucket = new Uint32Array(BUCKETS); // unsigned int array
316 this.data_len = 0;
317 this.tmp_code = new Uint8Array(CODE_SIZE);
318 this.Lvalue = 0;
319 this.Q = 0;
320 this.lsh_code = new String;
321 this.lsh_code_valid = false;
322 };
323
324 // Use get/setQLo() and get/setQHi() from TLSH.java implementation
325 function getQLo(Q)
326 {
327 return (Q & 0x0F)
328 }
329
330 function getQHi(Q)
331 {
332 return ((Q & 0xF0) >> 4);
333 }
334
335 function setQLo(Q, x)
336 {
337 return (Q & 0xF0) | (x & 0x0F);
338 }
339
340 function setQHi(Q, x)
341 {
342 return (Q & 0x0F) | ((x & 0x0F) << 4);
343 }
344
345 // Allow caller to pass in length in case there are embedded null characters, as there
346 // are in strings str_1 and str_2 (see simple_test.cpp)
347 //
348 // length parameter defaults to str.length
349 Tlsh.prototype.update = function (str, length)
350 {
351 length = typeof length !== 'undefined' ? length : str.length;
352
353 var data = [];
354 for(var i = 0; i < length; i++) {
355 var code = str.charCodeAt(i);
356 if (code > 255) {
357 alert("Unexpected " + str[i] + " has value " + code + " which is too large");
358 return;
359 }
360 // Since charCodeAt returns between 0~65536, simply save every character as 2-bytes
361 // data.push(code & 0xff00, code & 0xff);
362 data.push(code & 0xff);
363 }
364
365 if (length != data.length)
366 {
367 alert("Unexpected string length:" + length + " is not equal to value unsigned char length: " + data.length);
368 return;
369 }
370
371 var j = this.data_len % RNG_SIZE;
372 var fed_len = this.data_len;
373
374 for( var i=0; i<length; i++, fed_len++, j=RNG_IDX(j+1) ) {
375 this.slide_window[j] = data[i];
376 debug && console.log("slide_window["+j+"]="+this.slide_window[j]);
377
378 if ( fed_len >= 4 ) {
379 //only calculate when input >= 5 bytes
380 var j_1 = RNG_IDX(j-1);
381 var j_2 = RNG_IDX(j-2);
382 var j_3 = RNG_IDX(j-3);
383 var j_4 = RNG_IDX(j-4);
384
385 for (var k = 0; k < TLSH_CHECKSUM_LEN; k++) {
386 if (k == 0) {
387 this.checksum[k] = b_mapping(0, this.slide_window[j], this.slide_window[j_1], this.checksum[k]);
388 debug && console.log("tlsh.checksum["+k+"]="+this.checksum[k]);
389 }
390 else {
391 // use calculated 1 byte checksums to expand the total checksum to 3 bytes
392 this.checksum[k] = b_mapping(this.checksum[k-1], this.slide_window[j], this.slide_window[j_1], this.checksum[k]);
393 }
394 }
395
396 var r;
397 r = b_mapping(2, this.slide_window[j], this.slide_window[j_1], this.slide_window[j_2]);
398 r = b_mapping(2, this.slide_window[j], this.slide_window[j_1], this.slide_window[j_2]);
399 r = b_mapping(2, this.slide_window[j], this.slide_window[j_1], this.slide_window[j_2]);
400
401
402 this.a_bucket[r]++;
403 r = b_mapping(3, this.slide_window[j], this.slide_window[j_1], this.slide_window[j_3]);
404 this.a_bucket[r]++;
405 r = b_mapping(5, this.slide_window[j], this.slide_window[j_2], this.slide_window[j_3]);
406 this.a_bucket[r]++;
407 r = b_mapping(7, this.slide_window[j], this.slide_window[j_2], this.slide_window[j_4]);
408 this.a_bucket[r]++;
409 r = b_mapping(11, this.slide_window[j], this.slide_window[j_1], this.slide_window[j_4]);
410 this.a_bucket[r]++;
411 r = b_mapping(13, this.slide_window[j], this.slide_window[j_3], this.slide_window[j_4]);
412 this.a_bucket[r]++;
413 }
414 }
415 this.data_len += length;
416 }
417
418 // final is a reserved word
419 Tlsh.prototype.finale = function (str, length)
420 {
421 if (typeof str !== 'undefined') {
422 this.update(str, length);
423 }
424
425 // incoming data must more than or equal to 512 bytes
426 if (this.data_len < 256) {
427 alert("ERROR: length too small - " + this.data_len); // + ")");
428 }
429
430 var quartiles = new Object();
431 quartiles.q1 = 0;
432 quartiles.q2 = 0;
433 quartiles.q3 = 0;
434 find_quartile(this, quartiles);
435
436 // buckets must be more than 50% non-zero
437 var nonzero = 0;
438 for(var i=0; i<CODE_SIZE; i++) {
439 for(var j=0; j<4; j++) {
440 if (this.a_bucket[4*i + j] > 0) {
441 nonzero++;
442 }
443 }
444 }
445 if (nonzero <= 4*CODE_SIZE/2) {
446 alert("ERROR: not enought variation in input - " + nonzero + " < " + 4*CODE_SIZE/2);
447 }
448
449 for(var i=0; i<CODE_SIZE; i++) {
450 var h=0;
451 for(var j=0; j<4; j++) {
452 var k = this.a_bucket[4*i + j];
453 if( quartiles.q3 < k ) {
454 h += 3 << (j*2); // leave the optimization j*2 = j<<1 or j*2 = j+j for compiler
455 } else if( quartiles.q2 < k ) {
456 h += 2 << (j*2);
457 } else if( quartiles.q1 < k ) {
458 h += 1 << (j*2);
459 }
460 }
461 this.tmp_code[i] = h;
462 }
463
464 this.Lvalue = l_capturing(this.data_len);
465 this.Q = setQLo(this.Q, ((quartiles.q1*100)/quartiles.q3) % 16);
466 this.Q = setQHi(this.Q, ((quartiles.q2*100)/quartiles.q3) % 16);
467 this.lsh_code_valid = true;
468 }
469
470 Tlsh.prototype.hash = function ()
471 {
472 if (this.lsh_code_valid == false) {
473 return "ERROR IN PROCESSING";
474 }
475
476 var tmp = new Object();
477 tmp.checksum = new Uint8Array(TLSH_CHECKSUM_LEN);
478 tmp.Lvalue = 0;
479 tmp.Q = 0;
480 tmp.tmp_code = new Uint8Array(CODE_SIZE);
481
482 for (var k = 0; k < TLSH_CHECKSUM_LEN; k++) {
483 tmp.checksum[k] = swap_byte( this.checksum[k] );
484 debug && console.log("After swap_byte for checksum: tmp.checksum:"+tmp.checksum[k]+", tlsh.checksum:"+this.checksum[k]);
485 }
486 tmp.Lvalue = swap_byte( this.Lvalue );
487 tmp.Q = swap_byte( this.Q );
488 debug && console.log("After swap_byte for Q: tmp.Q:"+tmp.Q+", tlsh.Q:"+this.Q);
489 for( var i=0; i < CODE_SIZE; i++ ){
490 tmp.tmp_code[i] = this.tmp_code[CODE_SIZE-1-i];
491 debug && console.log("tmp.tmp_code["+i+"]:"+tmp.tmp_code[i]);
492 }
493
494 this.lsh_code = to_hex(tmp.checksum, TLSH_CHECKSUM_LEN);
495
496 tmpArray = new Uint8Array(1);
497 tmpArray[0] = tmp.Lvalue;
498 this.lsh_code = this.lsh_code.concat(to_hex(tmpArray, 1));
499
500 tmpArray[0] = tmp.Q;
501 this.lsh_code = this.lsh_code.concat(to_hex(tmpArray, 1));
502 this.lsh_code = this.lsh_code.concat(to_hex(tmp.tmp_code, CODE_SIZE));
503 return this.lsh_code;
504 }
505
506 Tlsh.prototype.reset = function ()
507 {
508 this.checksum = new Uint8Array(TLSH_CHECKSUM_LEN);
509 this.slide_window = new Uint8Array(SLIDING_WND_SIZE);
510 this.a_bucket = new Uint32Array(BUCKETS);
511 this.data_len = 0;
512 this.tmp_code = new Uint8Array(CODE_SIZE);
513 this.Lvalue = 0;
514 this.Q = 0;
515 this.lsh_code = new String;
516 this.lsh_code_valid = false;
517 }
518
519 // len_diff defaults to true
520 Tlsh.prototype.totalDiff = function(other, len_diff)
521 {
522 if (this == other)
523 {
524 return 0;
525 }
526
527 len_diff = typeof len_diff !== 'undefined' ? len_diff : true;
528 var diff = 0;
529
530 if (len_diff) {
531 var ldiff = mod_diff( this.Lvalue, other.Lvalue, RANGE_LVALUE);
532 if ( ldiff == 0 )
533 diff = 0;
534 else if ( ldiff == 1 )
535 diff = 1;
536 else
537 diff += ldiff*12;
538 }
539
540 var q1diff = mod_diff( getQLo(this.Q), getQLo(other.Q), RANGE_QRATIO);
541 if ( q1diff <= 1 )
542 diff += q1diff;
543 else
544 diff += (q1diff-1)*12;
545
546 var q2diff = mod_diff( getQHi(this.Q), getQHi(other.Q), RANGE_QRATIO);
547 if ( q2diff <= 1)
548 diff += q2diff;
549 else
550 diff += (q2diff-1)*12;
551
552 for (var k = 0; k < TLSH_CHECKSUM_LEN; k++) {
553 if (this.checksum[k] != other.checksum[k] ) {
554 diff ++;
555 break;
556 }
557 }
558
559 diff += h_distance( CODE_SIZE, this.tmp_code, other.tmp_code );
560
561 return diff;
562 }
563
564 Tlsh.prototype.fromTlshStr = function(str)
565 {
566 if (str.length != TLSH_STRING_LEN) {
567 alert("Tlsh.fromTlshStr() - string has wrong length (" + str.length + " != " + TLSH_STRING_LEN + ")");
568 return;
569 }
570 for( var i=0; i < TLSH_STRING_LEN; i++ ) {
571 if (!(
572 (str[i] >= '0' && str[i] <= '9') ||
573 (str[i] >= 'A' && str[i] <= 'F') ||
574 (str[i] >= 'a' && str[i] <= 'f') ))
575 {
576 alert("Tlsh.fromTlshStr() - string has invalid (non-hex) characters");
577 return;
578 }
579 }
580
581 var tmp = from_hex(str);
582 // Order of assignment is based on order of fields in lsh_bin
583 // Also note that TLSH_CHECKSUM_LEN is 1
584 var i = 0;
585 this.checksum[i] = swap_byte( tmp[i++] );
586 this.Lvalue = swap_byte( tmp[i++] );
587 this.Q = swap_byte( tmp[i++] );
588
589 for( var j=0; j < CODE_SIZE; j++ ) {
590 this.tmp_code[j] = (tmp[i+CODE_SIZE-1-j]);
591 }
592 this.lsh_code_valid = true;
593 }
0 #include <math.h>
1 #include <stdlib.h>
2 #include <stdio.h>
3
4 /////////////////////////////////////////////////////////////////////////////
5 // Tlsh.java code to generate the bit_pairs_diff_table in tlsh_util.cpp
6
7 int result[256][256];
8
9 void generateTable()
10 {
11 for (int i = 0; i < 256; i++) {
12 for (int j = 0; j < 256; j++) {
13 int x = i, y = j, d, diff = 0;
14 d = abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
15 x /= 4; y /= 4;
16 d = abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
17 x /= 4; y /= 4;
18 d = abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
19 x /= 4; y /= 4;
20 d = abs(x % 4 - y % 4); diff += (d == 3 ? 6 : d);
21 result[i][j] = diff;
22 }
23 }
24 }
25
26 /////////////////////////////////////////////////////////////////////////////
27 // Jon Oliver's functions to generate bit_pairs_diff_table
28
29 static int pairbit_diff(int pairb, int opairb)
30 {
31 int diff = abs(pairb - opairb);
32 if (diff <= 1)
33 return(diff);
34 else if (diff == 2)
35 return(2);
36 return(6);
37 }
38
39 int byte_diff(unsigned char bv, unsigned char obv)
40 {
41 int h1 = (unsigned char) bv / 16;
42 int oh1 = (unsigned char) obv / 16;
43 int h2 = (unsigned char) bv % 16;
44 int oh2 = (unsigned char) obv % 16;
45 int p1 = h1 / 4;
46 int op1 = oh1 / 4;
47 int p2 = h1 % 4;
48 int op2 = oh1 % 4;
49 int p3 = h2 / 4;
50 int op3 = oh2 / 4;
51 int p4 = h2 % 4;
52 int op4 = oh2 % 4;
53 int diff = 0;
54 diff = diff + pairbit_diff(p1, op1);
55 diff = diff + pairbit_diff(p2, op2);
56 diff = diff + pairbit_diff(p3, op3);
57 diff = diff + pairbit_diff(p4, op4);
58 return(diff);
59 }
60
61 /////////////////////////////////////////////////////////////////////////////
62 // main() function to verify Tlsh.java and Jon's implementations are equalivant,
63 // and to output the static unsigned char bit_pairs_diff_table in
64 // tlsh_util.cpp.
65 int main()
66 {
67 int x;
68 int y;
69 generateTable();
70 for (x=0; x<256; x++) {
71 printf("{\n");
72 for (y=0; y<256; y++) {
73 int z = byte_diff((unsigned char) x, (unsigned char) y);
74 if (z != result[x][y]) {
75 printf("\nWARNING x=%d y=%d z=%d nuno=%d\n", x, y, z, result[x][y]);
76 return -1;
77 }
78 printf("%d", z);
79 if (y < 255)
80 printf(", ");
81 if (y % 16 == 15)
82 printf("\n");
83 }
84 printf("}");
85 if (x < 255)
86 printf(",");
87 printf("\n");
88 }
89 printf("};");
90 }
8383 if ( this == other )
8484 return 0;
8585 else
86 return (impl.totalDiff(other->impl, len_diff)+1);
86 return (impl.totalDiff(other->impl, len_diff));
8787 }
8888
8989 int Tlsh::fromTlshStr(const char* str)
4141
4242 TlshImpl::TlshImpl() : a_bucket(NULL), data_len(0), lsh_code(NULL), lsh_code_valid(false)
4343 {
44 memset(this->slide_window, 0, sizeof this->slide_window);
4445 memset(&this->lsh_bin, 0, sizeof this->lsh_bin);
4546 }
4647
5354 void TlshImpl::reset()
5455 {
5556 delete [] this->a_bucket; this->a_bucket = NULL;
57 memset(this->slide_window, 0, sizeof this->slide_window);
5658 delete [] this->lsh_code; this->lsh_code = NULL;
5759 memset(&this->lsh_bin, 0, sizeof this->lsh_bin);
5860 this->data_len = 0;
7274 memset(this->a_bucket, 0, sizeof(int)*BUCKETS);
7375 }
7476
75 unsigned char slide_window[SLIDING_WND_SIZE];
76 memset(slide_window, 0, sizeof(slide_window));
7777 for( unsigned int i=0; i<len; i++, fed_len++, j=RNG_IDX(j+1) ) {
78 slide_window[j] = data[i];
78 this->slide_window[j] = data[i];
7979
8080 if ( fed_len >= 4 ) {
8181 //only calculate when input >= 5 bytes
8686
8787 for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {
8888 if (k == 0) {
89 this->lsh_bin.checksum[k] = b_mapping(0, slide_window[j], slide_window[j_1], this->lsh_bin.checksum[k]);
89 this->lsh_bin.checksum[k] = b_mapping(0, this->slide_window[j], this->slide_window[j_1], this->lsh_bin.checksum[k]);
9090 }
9191 else {
9292 // use calculated 1 byte checksums to expand the total checksum to 3 bytes
93 this->lsh_bin.checksum[k] = b_mapping(this->lsh_bin.checksum[k-1], slide_window[j], slide_window[j_1], this->lsh_bin.checksum[k]);
93 this->lsh_bin.checksum[k] = b_mapping(this->lsh_bin.checksum[k-1], this->slide_window[j], this->slide_window[j_1], this->lsh_bin.checksum[k]);
9494 }
9595 }
9696
9797 unsigned char r;
98 r = b_mapping(2, slide_window[j], slide_window[j_1], slide_window[j_2]);
99 this->a_bucket[r]++;
100 r = b_mapping(3, slide_window[j], slide_window[j_1], slide_window[j_3]);
101 this->a_bucket[r]++;
102 r = b_mapping(5, slide_window[j], slide_window[j_2], slide_window[j_3]);
103 this->a_bucket[r]++;
104 r = b_mapping(7, slide_window[j], slide_window[j_2], slide_window[j_4]);
105 this->a_bucket[r]++;
106 r = b_mapping(11, slide_window[j], slide_window[j_1], slide_window[j_4]);
107 this->a_bucket[r]++;
108 r = b_mapping(13, slide_window[j], slide_window[j_3], slide_window[j_4]);
98 r = b_mapping(2, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_2]);
99 this->a_bucket[r]++;
100 r = b_mapping(3, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_3]);
101 this->a_bucket[r]++;
102 r = b_mapping(5, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_3]);
103 this->a_bucket[r]++;
104 r = b_mapping(7, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_4]);
105 this->a_bucket[r]++;
106 r = b_mapping(11, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_4]);
107 this->a_bucket[r]++;
108 r = b_mapping(13, this->slide_window[j], this->slide_window[j_3], this->slide_window[j_4]);
109109 this->a_bucket[r]++;
110110
111111 }
116116 /* to signal the class there is no more data to be added */
117117 void TlshImpl::final()
118118 {
119 // incoming data must more than or equal to 512 bytes
120 if (this->data_len < 512) {
119 // incoming data must more than or equal to MIN_DATA_LENGTH bytes
120 if (this->data_len < MIN_DATA_LENGTH) {
121121 // this->lsh_code be empty
122122 delete [] this->a_bucket; this->a_bucket = NULL;
123123 return;
175175 {
176176 return 1;
177177 }
178 this->reset();
179
180 lsh_bin_struct tmp;
181 from_hex( str, TLSH_STRING_LEN, (unsigned char*)&tmp );
182
183 // Reconstruct checksum, Qrations & lvalue
184 for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {
185 this->lsh_bin.checksum[k] = swap_byte(tmp.checksum[k]);
186 }
187 this->lsh_bin.Lvalue = swap_byte( tmp.Lvalue );
188 this->lsh_bin.Q.QB = swap_byte(tmp.Q.QB);
189 for( int i=0; i < CODE_SIZE; i++ ){
190 this->lsh_bin.tmp_code[i] = (tmp.tmp_code[CODE_SIZE-1-i]);
191 }
192 this->lsh_code_valid = true;
193
194 return 0;
178
179 this->reset();
180
181 lsh_bin_struct tmp;
182 from_hex( str, TLSH_STRING_LEN, (unsigned char*)&tmp );
183
184 // Reconstruct checksum, Qrations & lvalue
185 for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {
186 this->lsh_bin.checksum[k] = swap_byte(tmp.checksum[k]);
187 }
188 this->lsh_bin.Lvalue = swap_byte( tmp.Lvalue );
189 this->lsh_bin.Q.QB = swap_byte(tmp.Q.QB);
190 for( int i=0; i < CODE_SIZE; i++ ){
191 this->lsh_bin.tmp_code[i] = (tmp.tmp_code[CODE_SIZE-1-i]);
192 }
193 this->lsh_code_valid = true;
194
195 return 0;
195196 }
196197
197198 const char* TlshImpl::hash(char *buffer, unsigned int bufSize)
275276
276277 diff += h_distance( CODE_SIZE, this->lsh_bin.tmp_code, other.lsh_bin.tmp_code );
277278
278 return (diff - 1);
279 return (diff);
279280 }
280281
281282
3939 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
4040 };
4141
42 // Compile and run gen_arr2.cpp to generate bit_pairs_diff_table
4243 static unsigned char bit_pairs_diff_table[][256] = {
4344 {
4445 0, 1, 2, 6, 1, 2, 3, 7, 2, 3, 4, 8, 6, 7, 8, 12,
2121 set_target_properties(simple_unittest PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/test)
2222 set_target_properties(simple_unittest PROPERTIES OUTPUT_NAME simple_unittest${BUILD_POSTFIX})
2323
24 if(CMAKE_COMPILER_IS_GNUCXX)
25 add_executable(tlsh_unittest tlsh_unittest.cpp)
26 target_link_libraries(tlsh_unittest tlsh)
27 set_target_properties(tlsh_unittest PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)
28 set_target_properties(tlsh_unittest PROPERTIES OUTPUT_NAME tlsh_unittest${BUILD_POSTFIX})
29 endif()
24 add_executable(tlsh_unittest tlsh_unittest.cpp)
25 target_link_libraries(tlsh_unittest tlsh)
26 set_target_properties(tlsh_unittest PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)
27 set_target_properties(tlsh_unittest PROPERTIES OUTPUT_NAME tlsh_unittest${BUILD_POSTFIX})
2323 #include <stdio.h>
2424 #include <stdlib.h>
2525 #include <string.h>
26 #include <assert.h>
2627
2728 #include "tlsh.h"
2829
6162 printf("difference (same strings) = %d\n", t1.totalDiff(&t1) );
6263 printf("difference (with len) = %d\n", t1.totalDiff(&t2) );
6364 printf("difference (without len) = %d\n", t1.totalDiff(&t2, false) );
65
66 printf("Testing Tlsh with multiple update calls\n");
67 Tlsh t3, t4;
68 snprintf(minSizeBuffer1, sizeof(minSizeBuffer1), "%s", str1);
69 t3.update( (const unsigned char*) minSizeBuffer1, len1);
70 for (int i = 0; i < 511; i++) {
71 minSizeBuffer1[i] = i % 26 + 'A';
72 }
73 minSizeBuffer1[511] = 0;
74 t3.update( (const unsigned char*) minSizeBuffer1+len1, 512-len1);
75 t3.final();
76 assert(strcmp(t1.getHash(), t3.getHash()) == 0);
77
78 snprintf(minSizeBuffer2, sizeof(minSizeBuffer2), "%s", str2);
79 t4.update( (const unsigned char*) minSizeBuffer2, len2);
80 for (int i = 0; i < 1023; i++) {
81 minSizeBuffer2[i] = i % 26 + 'A';
82 }
83 minSizeBuffer1[1023] = 0;
84 t4.final( (const unsigned char*) minSizeBuffer2+len2, 1024-len2);
85 assert(strcmp(t2.getHash(), t4.getHash()) == 0);
86
87 printf("hash3 = %s\n", t3.getHash() );
88 printf("hash4 = %s\n", t4.getHash() );
89
90 printf("Testing Tlsh.fromTlshStr()\n");
91 printf("Recreating tlsh3 from %s\n", t1.getHash(minSizeBuffer1, sizeof(minSizeBuffer1)));
92 t3.reset();
93 t3.fromTlshStr(minSizeBuffer1);
94 printf("hash3 = %s\n", t3.getHash(minSizeBuffer2, sizeof(minSizeBuffer2)));
95 assert(strcmp(minSizeBuffer1, minSizeBuffer2) == 0);
96
97 printf("Recreating tlsh4 from %s\n", t2.getHash(minSizeBuffer1, sizeof(minSizeBuffer1)));
98 t4.reset();
99 t4.fromTlshStr(minSizeBuffer1);
100 printf("hash4 = %s\n", t4.getHash(minSizeBuffer2, sizeof(minSizeBuffer2)));
101 assert(strcmp(minSizeBuffer1, minSizeBuffer2) == 0);
102 printf("difference (same strings) = %d\n", t3.totalDiff(&t3) );
103 printf("difference (with len) = %d\n", t3.totalDiff(&t4) );
104 printf("difference (without len) = %d\n", t3.totalDiff(&t4, false) );
64105 }
5757
5858 fclose(fd);
5959
60 if (sizefile < 512)
60 if (sizefile < MIN_DATA_LENGTH)
6161 return(WARNING_FILE_TOO_SMALL);
6262
6363 ///////////////////////////////////////
145145 int n_file = 0;
146146 while (dit != NULL) {
147147 char tmp_fname[2000];
148 strncpy(tmp_fname, dirname, sizeof(tmp_fname));
149 strncat(tmp_fname, "/", sizeof(tmp_fname));
150 strncat(tmp_fname, dit->d_name, sizeof(tmp_fname));
151 if (strlen(tmp_fname) < sizeof(tmp_fname) - 2) {
148 int len = snprintf(tmp_fname, sizeof(tmp_fname)-1, "%s/%s", dirname, dit->d_name);
149 if (len < sizeof(tmp_fname) - 2) {
152150 if (is_dir(tmp_fname) ) {
153151 if ((strcmp(dit->d_name, ".") == 0) || (strcmp(dit->d_name, "..") == 0)) {
154152 ;
179177 dit = readdir(dip);
180178 while (dit != NULL) {
181179 char tmp_fname[2000];
182 strncpy(tmp_fname, dirname, sizeof(tmp_fname));
183 strncat(tmp_fname, "/", sizeof(tmp_fname));
184 strncat(tmp_fname, dit->d_name, sizeof(tmp_fname));
180 int len = snprintf(tmp_fname, sizeof(tmp_fname)-1, "%s/%s", dirname, dit->d_name);
185181 // -2 for safety
186 if (strlen(tmp_fname) < sizeof(tmp_fname) - 2) {
182 if (len < sizeof(tmp_fname) - 2) {
187183 if (is_dir(tmp_fname) ) {
188184 if ((strcmp(dit->d_name, ".") == 0) || (strcmp(dit->d_name, "..") == 0)) {
189185 ;