Codebase list tlsh / dafea28
Add Tlsh_fromTlshStr() to python API and extend py_ext/test.py to test for this. Add description of expanded python API to README.md file. Scott4man 8 years ago
4 changed file(s) with 64 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
8989 import tlsh
9090 tlsh.hash(data)
9191 ```
92
9293
9394 Note that the data must contain at least 256 bytes to generate a hash value and that
9495 it must have a certain amount of randomness.
105106 with only a single instance of the pattern, then the difference will be increased
106107 if the file lenght is included. But by using the `diffxlen` function, the file
107108 length will be removed from consideration.
109
110 Note that the python API has been extended to miror the C++ API. See
111 py_ext/tlshmodule.cpp and the py_ext/test.py script to see the full API set.
108112
109113 # Design Choices
110114
77 h2.diff(h1) 427
88 h1.diff(hex2) 427
99 h2.diff(hex1) 427
10 tlsh.Tlsh.fromTlshStr E951784702042376169012B1BA5A76EAF36092FC3311A595B4856235278F9F973763EF
11 h3.diff(h2) 0
3333 print('h2.diff(h1)', h2.diff(h1))
3434 print('h1.diff(hex2)', h1.diff(hex2))
3535 print('h2.diff(hex1)', h2.diff(hex1))
36
37 h3 = tlsh.Tlsh()
38 h3.fromTlshStr(hex2)
39 print('tlsh.Tlsh.fromTlshStr', hex2)
40 print('h3.diff(h2)', h3.diff(h2))
41
42
1111 #else
1212 # define BYTES_VALUE_CHAR "s"
1313 #endif
14
15 #define MIN_TLSH_LEN 512
1614
1715 static char tlsh_doc[] =
1816 "TLSH C version - similarity matching and searching";
5048 }
5149
5250 Tlsh tlsh1, tlsh2;
53 tlsh1.fromTlshStr(hash1);
54 tlsh2.fromTlshStr(hash2);
51 if (tlsh1.fromTlshStr(hash1) != 0) {
52 return PyErr_Format(PyExc_ValueError, "argument %s is not a TLSH hex string", hash1);
53 }
54 if (tlsh2.fromTlshStr(hash2) != 0) {
55 return PyErr_Format(PyExc_ValueError, "argument %s is not a TLSH hex string", hash2);
56 }
5557
5658 int score = tlsh1.totalDiff(&tlsh2);
5759
6668 }
6769
6870 Tlsh tlsh1, tlsh2;
69 tlsh1.fromTlshStr(hash1);
70 tlsh2.fromTlshStr(hash2);
71 if (tlsh1.fromTlshStr(hash1) != 0) {
72 return PyErr_Format(PyExc_ValueError, "argument %s is not a TLSH hex string", hash1);
73 }
74 if (tlsh2.fromTlshStr(hash2) != 0) {
75 return PyErr_Format(PyExc_ValueError, "argument %s is not a TLSH hex string", hash2);
76 }
7177
7278 int score = tlsh1.totalDiff(&tlsh2, false);
7379
9096 Tlsh tlsh;
9197 } tlsh_TlshObject;
9298
99 static PyObject * Tlsh_fromTlshStr(tlsh_TlshObject *, PyObject *);
93100 static PyObject * Tlsh_update(tlsh_TlshObject *, PyObject *);
94101 static PyObject * Tlsh_final(tlsh_TlshObject *);
95102 static PyObject * Tlsh_hexdigest(tlsh_TlshObject *);
96103 static PyObject * Tlsh_diff(tlsh_TlshObject *, PyObject *);
97104
98105 static PyMethodDef Tlsh_methods[] = {
106 {"fromTlshStr", (PyCFunction) Tlsh_fromTlshStr, METH_VARARGS,
107 "Create a TLSH instance from a hex string."
108 },
99109 {"update", (PyCFunction) Tlsh_update, METH_VARARGS,
100110 "Update the TLSH with the given string."
101111 },
156166 };
157167
158168 static PyObject *
169 Tlsh_fromTlshStr(tlsh_TlshObject *self, PyObject *args)
170 {
171 char *str;
172 Py_ssize_t len;
173
174 PyObject *arg;
175
176 if (PyTuple_Size(args) != 1)
177 return PyErr_Format(PyExc_TypeError, "function takes exactly 1 argument (%lu given)", PyTuple_Size(args));
178
179 arg = PyTuple_GetItem(args, 0);
180 if (PyBytes_AsStringAndSize(arg, &str, &len) == -1) {
181 PyErr_SetString(PyExc_ValueError, "argument is not a TLSH hex string");
182 return NULL;
183 }
184
185 if (len != TLSH_STRING_LEN) {
186 PyErr_SetString(PyExc_ValueError, "argument length incorrect: not a TLSH hex string");
187 return NULL;
188 }
189
190 if (self->tlsh.fromTlshStr(str) != 0) {
191 PyErr_SetString(PyExc_ValueError, "argument value incorrect: not a TLSH hex string");
192 return NULL;
193 }
194 self->finalized = true;
195
196 Py_RETURN_NONE;
197 }
198
199 static PyObject *
159200 Tlsh_update(tlsh_TlshObject *self, PyObject *args)
160201 {
161202 const char *str;
168209 PyErr_SetString(PyExc_ValueError, "final() has already been called");
169210 return NULL;
170211 }
171 if (self->required_data < MIN_TLSH_LEN) {
172 self->required_data += len > MIN_TLSH_LEN ? MIN_TLSH_LEN : len;
212 if (self->required_data < MIN_DATA_LENGTH) {
213 self->required_data += len > MIN_DATA_LENGTH ? MIN_DATA_LENGTH : len;
173214 }
174215
175216 self->tlsh.update((unsigned char *) str, (unsigned int) len);
184225 PyErr_SetString(PyExc_ValueError, "final() has already been called");
185226 return NULL;
186227 }
187 if (self->required_data < MIN_TLSH_LEN) {
188 return PyErr_Format(PyExc_ValueError, "less than %u of input", MIN_TLSH_LEN);
228 if (self->required_data < MIN_DATA_LENGTH) {
229 return PyErr_Format(PyExc_ValueError, "less than %u of input", MIN_DATA_LENGTH);
189230 }
190231 self->finalized = true;
191232 self->tlsh.final();