Merge tag 'upstream/0.5.31'
Upstream version 0.5.31
Sascha Steinbiss
4 years ago
0 | 0.5.31 (24 September 2019) | |
1 | -------------------------- | |
2 | ||
3 | - various improvements related to 'HTTP Evader' | |
4 | ||
5 | - various fixes for issues found by oss-fuzz | |
6 | ||
7 | - adds optional LZMA decompression | |
8 | ||
0 | 9 | 0.5.30 (07 March 2019) |
1 | 10 | ---------------------- |
2 | 11 |
249 | 249 | AC_OUTPUT(Makefile \ |
250 | 250 | htp.pc \ |
251 | 251 | htp/Makefile \ |
252 | htp/lzma/Makefile \ | |
252 | 253 | test/Makefile \ |
253 | 254 | docs/Makefile |
254 | 255 | ) |
0 | ||
1 | SUBDIRS = lzma | |
0 | 2 | |
1 | 3 | h_sources = bstr.h bstr_builder.h htp.h htp_base64.h htp_config.h htp_connection_parser.h \ |
2 | 4 | htp_core.h htp_decompressors.h htp_hooks.h htp_list.h \ |
24 | 26 | |
25 | 27 | lib_LTLIBRARIES = libhtp.la |
26 | 28 | libhtp_la_SOURCES = |
27 | libhtp_la_LIBADD = libhtp-c.la | |
29 | libhtp_la_LIBADD = libhtp-c.la lzma/liblzma-c.la | |
28 | 30 | libhtp_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) |
218 | 218 | return bstr_util_cmp_mem_nocase(bstr_ptr(b), bstr_len(b), c, strlen(c)); |
219 | 219 | } |
220 | 220 | |
221 | int bstr_cmp_c_nocasenorzero(const bstr *b, const char *c) { | |
222 | return bstr_util_cmp_mem_nocasenorzero(bstr_ptr(b), bstr_len(b), c, strlen(c)); | |
223 | } | |
224 | ||
221 | 225 | int bstr_cmp_mem(const bstr *b, const void *data, size_t len) { |
222 | 226 | return bstr_util_cmp_mem(bstr_ptr(b), bstr_len(b), data, len); |
223 | 227 | } |
292 | 296 | |
293 | 297 | int bstr_index_of_c_nocase(const bstr *haystack, const char *needle) { |
294 | 298 | return bstr_index_of_mem_nocase(haystack, needle, strlen(needle)); |
299 | } | |
300 | ||
301 | int bstr_index_of_c_nocasenorzero(const bstr *haystack, const char *needle) { | |
302 | return bstr_util_mem_index_of_mem_nocasenorzero(bstr_ptr(haystack), bstr_len(haystack), needle, strlen(needle)); | |
295 | 303 | } |
296 | 304 | |
297 | 305 | int bstr_index_of_mem(const bstr *haystack, const void *_data2, size_t len2) { |
378 | 386 | } |
379 | 387 | |
380 | 388 | if ((p1 == len2) && (p2 == len1)) { |
389 | // They're identical. | |
390 | return 0; | |
391 | } else { | |
392 | // One string is shorter. | |
393 | if (p1 == len1) return -1; | |
394 | else return 1; | |
395 | } | |
396 | } | |
397 | ||
398 | int bstr_util_cmp_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) { | |
399 | const unsigned char *data1 = (const unsigned char *) _data1; | |
400 | const unsigned char *data2 = (const unsigned char *) _data2; | |
401 | size_t p1 = 0, p2 = 0; | |
402 | ||
403 | while ((p1 < len1) && (p2 < len2)) { | |
404 | if (data1[p1] == 0) { | |
405 | p1++; | |
406 | continue; | |
407 | } | |
408 | if (tolower(data1[p1]) != tolower(data2[p2])) { | |
409 | // Difference. | |
410 | return (tolower(data1[p1]) < tolower(data2[p2])) ? -1 : 1; | |
411 | } | |
412 | ||
413 | p1++; | |
414 | p2++; | |
415 | } | |
416 | ||
417 | while((p1 < len1) && (data1[p1] == 0)) { | |
418 | p1++; | |
419 | } | |
420 | if ((p1 == len1) && (p2 == len2)) { | |
381 | 421 | // They're identical. |
382 | 422 | return 0; |
383 | 423 | } else { |
495 | 535 | return -1; |
496 | 536 | } |
497 | 537 | |
538 | int bstr_util_mem_index_of_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) { | |
539 | const unsigned char *data1 = (unsigned char *) _data1; | |
540 | const unsigned char *data2 = (unsigned char *) _data2; | |
541 | size_t i, j; | |
542 | ||
543 | // If we ever want to optimize this function, the following link | |
544 | // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm | |
545 | ||
546 | for (i = 0; i < len1; i++) { | |
547 | size_t k = i; | |
548 | if (data1[i] == 0) { | |
549 | // skip leading zeroes to avoid quadratic complexity | |
550 | continue; | |
551 | } | |
552 | ||
553 | for (j = 0; ((j < len2) && (k < len1)); j++, k++) { | |
554 | if (data1[k] == 0) { | |
555 | j--; | |
556 | continue; | |
557 | } | |
558 | if (toupper(data1[k]) != toupper(data2[j])) break; | |
559 | } | |
560 | ||
561 | if (j == len2) { | |
562 | return i; | |
563 | } | |
564 | } | |
565 | ||
566 | return -1; | |
567 | } | |
568 | ||
498 | 569 | void bstr_util_mem_trim(unsigned char **data, size_t *len) { |
499 | 570 | if ((data == NULL)||(len == NULL)) return; |
500 | 571 |
313 | 313 | int bstr_cmp_c_nocase(const bstr *b, const char *cstr); |
314 | 314 | |
315 | 315 | /** |
316 | * Case-insensitive zero-skipping comparison of a bstring with a NUL-terminated string. | |
317 | * | |
318 | * @param[in] b | |
319 | * @param[in] cstr | |
320 | * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is greater than b. | |
321 | */ | |
322 | int bstr_cmp_c_nocasenorzero(const bstr *b, const char *cstr); | |
323 | ||
324 | /** | |
316 | 325 | * Performs a case-sensitive comparison of a bstring with a memory region. |
317 | 326 | * |
318 | 327 | * @param[in] b |
343 | 352 | int bstr_cmp_nocase(const bstr *b1, const bstr *b2); |
344 | 353 | |
345 | 354 | /** |
355 | * Case-insensitive and zero skipping comparison two bstrings. | |
356 | * | |
357 | * @param[in] b1 | |
358 | * @param[in] b2 | |
359 | * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is | |
360 | * greater than b1. | |
361 | */ | |
362 | int bstr_cmp_nocasenorzero(const bstr *b1, const bstr *b2); | |
363 | ||
364 | /** | |
346 | 365 | * Create a new bstring by copying the provided bstring. |
347 | 366 | * |
348 | 367 | * @param[in] b |
443 | 462 | * @return Position of the match, or -1 if the needle could not be found. |
444 | 463 | */ |
445 | 464 | int bstr_index_of_c_nocase(const bstr *bhaystack, const char *cneedle); |
465 | ||
466 | /** | |
467 | * Find the needle in the haystack, with the needle being a NUL-terminated | |
468 | * string. Ignore case differences. Skip zeroes in haystack | |
469 | * | |
470 | * @param[in] bhaystack | |
471 | * @param[in] cneedle | |
472 | * @return Position of the match, or -1 if the needle could not be found. | |
473 | */ | |
474 | int bstr_index_of_c_nocasenorzero(const bstr *bhaystack, const char *cneedle); | |
446 | 475 | |
447 | 476 | /** |
448 | 477 | * Find the needle in the haystack, with the needle being a memory region. |
506 | 535 | * data2, and -1 if data2 is greater than data1. |
507 | 536 | */ |
508 | 537 | int bstr_util_cmp_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2); |
538 | ||
539 | /** | |
540 | * Case-insensitive zero-skipping comparison of two memory regions. | |
541 | * | |
542 | * @param[in] data1 | |
543 | * @param[in] len1 | |
544 | * @param[in] data2 | |
545 | * @param[in] len2 | |
546 | * @return Zero if the memory regions are identical, 1 if data1 is greater than | |
547 | * data2, and -1 if data2 is greater than data1. | |
548 | */ | |
549 | int bstr_util_cmp_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2); | |
509 | 550 | |
510 | 551 | /** |
511 | 552 | * Convert contents of a memory region to a positive integer. |
564 | 605 | int bstr_util_mem_index_of_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2); |
565 | 606 | |
566 | 607 | /** |
608 | * Searches the haystack memory block for the needle memory block. Case sensitive. Skips zeroes in data1 | |
609 | * | |
610 | * @param data1 | |
611 | * @param len1 | |
612 | * @param data2 | |
613 | * @param len2 | |
614 | * @return Index of the first location of the needle on success, or -1 if the needle was not found. | |
615 | */ | |
616 | int bstr_util_mem_index_of_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2); | |
617 | ||
618 | /** | |
567 | 619 | * Removes whitespace from the beginning and the end of a memory region. The data |
568 | 620 | * itself is not modified; this function only adjusts the provided pointers. |
569 | 621 | * |
535 | 535 | |
536 | 536 | /** Transaction index on the connection. */ |
537 | 537 | size_t index; |
538 | ||
539 | /** Total repetitions for headers in request. */ | |
540 | uint16_t req_header_repetitions; | |
541 | ||
542 | /** Total repetitions for headers in response. */ | |
543 | uint16_t res_header_repetitions; | |
538 | 544 | }; |
539 | 545 | |
540 | 546 | /** |
157 | 157 | cfg->extract_request_files = 0; |
158 | 158 | cfg->extract_request_files_limit = -1; // Use the parser default. |
159 | 159 | cfg->response_decompression_layer_limit = 2; // 2 layers seem fairly common |
160 | cfg->lzma_memlimit = HTP_LZMA_MEMLIMIT; | |
161 | cfg->compression_bomb_limit = HTP_COMPRESSION_BOMB_LIMIT; | |
160 | 162 | |
161 | 163 | // Default settings for URL-encoded data. |
162 | 164 | |
506 | 508 | cfg->field_limit_hard = hard_limit; |
507 | 509 | } |
508 | 510 | |
511 | void htp_config_set_lzma_memlimit(htp_cfg_t *cfg, size_t memlimit) { | |
512 | if (cfg == NULL) return; | |
513 | cfg->lzma_memlimit = memlimit; | |
514 | } | |
515 | ||
516 | void htp_config_set_compression_bomb_limit(htp_cfg_t *cfg, size_t bomblimit) { | |
517 | if (cfg == NULL) return; | |
518 | if (bomblimit > INT32_MAX) { | |
519 | cfg->compression_bomb_limit = INT32_MAX; | |
520 | } else { | |
521 | cfg->compression_bomb_limit = bomblimit; | |
522 | } | |
523 | } | |
524 | ||
509 | 525 | void htp_config_set_log_level(htp_cfg_t *cfg, enum htp_log_level_t log_level) { |
510 | 526 | if (cfg == NULL) return; |
511 | 527 | cfg->log_level = log_level; |
426 | 426 | void htp_config_set_field_limits(htp_cfg_t *cfg, size_t soft_limit, size_t hard_limit); |
427 | 427 | |
428 | 428 | /** |
429 | * Configures the maximum memlimit LibHTP will pass to liblzma. | |
430 | * | |
431 | * @param[in] cfg | |
432 | * @param[in] memlimit | |
433 | */ | |
434 | void htp_config_set_lzma_memlimit(htp_cfg_t *cfg, size_t memlimit); | |
435 | ||
436 | /** | |
437 | * Configures the maximum compression bomb size LibHTP will decompress. | |
438 | * | |
439 | * @param[in] cfg | |
440 | * @param[in] bomblimit | |
441 | */ | |
442 | void htp_config_set_compression_bomb_limit(htp_cfg_t *cfg, size_t bomblimit); | |
443 | ||
444 | /** | |
429 | 445 | * Configures the desired log level. |
430 | 446 | * |
431 | 447 | * @param[in] cfg |
341 | 341 | |
342 | 342 | /** How many layers of compression we will decompress (0 => no limit). */ |
343 | 343 | int response_decompression_layer_limit; |
344 | ||
345 | /** max memory use by a the lzma decompressor. */ | |
346 | size_t lzma_memlimit; | |
347 | ||
348 | /** max output size for a compression bomb. */ | |
349 | int32_t compression_bomb_limit; | |
344 | 350 | }; |
345 | 351 | |
346 | 352 | #ifdef __cplusplus |
154 | 154 | HTP_COMPRESSION_GZIP = 2, |
155 | 155 | |
156 | 156 | /** Deflate compression. */ |
157 | HTP_COMPRESSION_DEFLATE = 3 | |
157 | HTP_COMPRESSION_DEFLATE = 3, | |
158 | ||
159 | /** LZMA compression. */ | |
160 | HTP_COMPRESSION_LZMA = 4 | |
158 | 161 | }; |
159 | 162 | |
160 | 163 | /** |
229 | 232 | #define HTP_REQUEST_INVALID_C_L 0x200000000ULL |
230 | 233 | #define HTP_AUTH_INVALID 0x400000000ULL |
231 | 234 | |
235 | #define HTP_MAX_HEADERS_REPETITIONS 64 | |
236 | ||
232 | 237 | #define HTP_HOST_INVALID ( HTP_HOSTU_INVALID | HTP_HOSTH_INVALID ) |
233 | 238 | |
234 | 239 | // Logging-related constants. |
39 | 39 | |
40 | 40 | #include "htp_private.h" |
41 | 41 | |
42 | ||
43 | static void *SzAlloc(ISzAllocPtr p, size_t size) { return malloc(size); } | |
44 | static void SzFree(ISzAllocPtr p, void *address) { free(address); } | |
45 | const ISzAlloc lzma_Alloc = { SzAlloc, SzFree }; | |
46 | ||
47 | ||
42 | 48 | /** |
43 | 49 | * @brief See if the header has extensions |
44 | 50 | * @return number of bytes to skip |
153 | 159 | } |
154 | 160 | |
155 | 161 | /** |
162 | * Ends decompressor. | |
163 | * | |
164 | * @param[in] drec | |
165 | */ | |
166 | static void htp_gzip_decompressor_end(htp_decompressor_gzip_t *drec) { | |
167 | if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) { | |
168 | LzmaDec_Free(&drec->state, &lzma_Alloc); | |
169 | drec->zlib_initialized = 0; | |
170 | } else if (drec->zlib_initialized) { | |
171 | inflateEnd(&drec->stream); | |
172 | drec->zlib_initialized = 0; | |
173 | } | |
174 | } | |
175 | ||
176 | /** | |
156 | 177 | * Decompress a chunk of gzip-compressed data. |
157 | 178 | * If we have more than one decompressor, call this function recursively. |
158 | 179 | * |
163 | 184 | static htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_gzip_t *drec, htp_tx_data_t *d) { |
164 | 185 | size_t consumed = 0; |
165 | 186 | int rc = 0; |
187 | htp_status_t callback_rc; | |
166 | 188 | |
167 | 189 | // Pass-through the NULL chunk, which indicates the end of the stream. |
168 | 190 | |
173 | 195 | d2.len = d->len; |
174 | 196 | d2.is_last = d->is_last; |
175 | 197 | |
176 | htp_status_t callback_rc = drec->super.callback(&d2); | |
198 | callback_rc = drec->super.callback(&d2); | |
177 | 199 | if (callback_rc != HTP_OK) { |
178 | 200 | return HTP_ERROR; |
179 | 201 | } |
185 | 207 | // Prepare data for callback. |
186 | 208 | htp_tx_data_t dout; |
187 | 209 | dout.tx = d->tx; |
188 | dout.data = NULL; | |
189 | dout.len = 0; | |
210 | // This is last call, so output uncompressed data so far | |
211 | dout.len = GZIP_BUF_SIZE - drec->stream.avail_out; | |
212 | if (dout.len > 0) { | |
213 | dout.data = drec->buffer; | |
214 | } else { | |
215 | dout.data = NULL; | |
216 | } | |
190 | 217 | dout.is_last = d->is_last; |
191 | 218 | if (drec->super.next != NULL && drec->zlib_initialized) { |
192 | 219 | return htp_gzip_decompressor_decompress((htp_decompressor_gzip_t *)drec->super.next, &dout); |
193 | 220 | } else { |
194 | 221 | // Send decompressed data to the callback. |
195 | htp_status_t callback_rc = drec->super.callback(&dout); | |
222 | callback_rc = drec->super.callback(&dout); | |
196 | 223 | if (callback_rc != HTP_OK) { |
197 | inflateEnd(&drec->stream); | |
198 | drec->zlib_initialized = 0; | |
199 | ||
224 | htp_gzip_decompressor_end(drec); | |
200 | 225 | return callback_rc; |
201 | 226 | } |
202 | 227 | } |
225 | 250 | d2.len = GZIP_BUF_SIZE; |
226 | 251 | d2.is_last = d->is_last; |
227 | 252 | |
228 | //if (drec->super.next != NULL) { | |
229 | 253 | if (drec->super.next != NULL && drec->zlib_initialized) { |
230 | htp_tx_data_t d3; | |
231 | d3.tx = d->tx; | |
232 | d3.data = drec->buffer; | |
233 | d3.len = GZIP_BUF_SIZE; | |
234 | d3.is_last = d->is_last; | |
235 | return htp_gzip_decompressor_decompress((htp_decompressor_gzip_t *)drec->super.next, &d3); | |
254 | callback_rc = htp_gzip_decompressor_decompress((htp_decompressor_gzip_t *)drec->super.next, &d2); | |
236 | 255 | } else { |
237 | 256 | // Send decompressed data to callback. |
238 | htp_status_t callback_rc = drec->super.callback(&d2); | |
239 | if (callback_rc != HTP_OK) { | |
240 | inflateEnd(&drec->stream); | |
241 | drec->zlib_initialized = 0; | |
242 | ||
243 | return callback_rc; | |
244 | } | |
257 | callback_rc = drec->super.callback(&d2); | |
258 | } | |
259 | if (callback_rc != HTP_OK) { | |
260 | htp_gzip_decompressor_end(drec); | |
261 | return callback_rc; | |
245 | 262 | } |
246 | 263 | |
247 | 264 | drec->stream.next_out = drec->buffer; |
248 | 265 | drec->stream.avail_out = GZIP_BUF_SIZE; |
249 | 266 | } |
250 | 267 | |
251 | if (drec->zlib_initialized) { | |
268 | if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) { | |
269 | if (drec->header_len < LZMA_PROPS_SIZE + 8) { | |
270 | consumed = LZMA_PROPS_SIZE + 8 - drec->header_len; | |
271 | if (consumed > drec->stream.avail_in) { | |
272 | consumed = drec->stream.avail_in; | |
273 | } | |
274 | memcpy(drec->header + drec->header_len, drec->stream.next_in, consumed); | |
275 | drec->stream.next_in = (unsigned char *) (d->data + consumed); | |
276 | drec->stream.avail_in = d->len - consumed; | |
277 | drec->header_len += consumed; | |
278 | } | |
279 | if (drec->header_len == LZMA_PROPS_SIZE + 8) { | |
280 | rc = LzmaDec_Allocate(&drec->state, drec->header, LZMA_PROPS_SIZE, &lzma_Alloc); | |
281 | if (rc != SZ_OK) | |
282 | return rc; | |
283 | LzmaDec_Init(&drec->state); | |
284 | // hacky to get to next step end retry allocate in case of failure | |
285 | drec->header_len++; | |
286 | } | |
287 | if (drec->header_len > LZMA_PROPS_SIZE + 8) { | |
288 | size_t inprocessed = drec->stream.avail_in; | |
289 | size_t outprocessed = drec->stream.avail_out; | |
290 | ELzmaStatus status; | |
291 | rc = LzmaDec_DecodeToBuf(&drec->state, drec->stream.next_out, &outprocessed, | |
292 | drec->stream.next_in, &inprocessed, LZMA_FINISH_ANY, &status, d->tx->cfg->lzma_memlimit); | |
293 | drec->stream.avail_in -= inprocessed; | |
294 | drec->stream.next_in += inprocessed; | |
295 | drec->stream.avail_out -= outprocessed; | |
296 | drec->stream.next_out += outprocessed; | |
297 | switch (rc) { | |
298 | case SZ_OK: | |
299 | rc = Z_OK; | |
300 | if (status == LZMA_STATUS_FINISHED_WITH_MARK) { | |
301 | rc = Z_STREAM_END; | |
302 | } | |
303 | break; | |
304 | case SZ_ERROR_MEM: | |
305 | htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "LZMA decompressor: memory limit reached"); | |
306 | // fall through | |
307 | default: | |
308 | rc = Z_DATA_ERROR; | |
309 | } | |
310 | } | |
311 | } else if (drec->zlib_initialized) { | |
252 | 312 | rc = inflate(&drec->stream, Z_NO_FLUSH); |
313 | } else { | |
314 | // no initialization means previous error on stream | |
315 | return HTP_ERROR; | |
316 | } | |
317 | if (GZIP_BUF_SIZE > drec->stream.avail_out) { | |
318 | if (rc == Z_DATA_ERROR) { | |
319 | // There is data even if there is an error | |
320 | // So use this data and log a warning | |
321 | htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc); | |
322 | rc = Z_STREAM_END; | |
323 | } | |
253 | 324 | } |
254 | 325 | if (rc == Z_STREAM_END) { |
255 | 326 | // How many bytes do we have? |
265 | 336 | d2.is_last = d->is_last; |
266 | 337 | |
267 | 338 | if (drec->super.next != NULL && drec->zlib_initialized) { |
268 | htp_tx_data_t d3; | |
269 | d3.tx = d->tx; | |
270 | d3.data = drec->buffer; | |
271 | d3.len = len; | |
272 | d3.is_last = d->is_last; | |
273 | return htp_gzip_decompressor_decompress((htp_decompressor_gzip_t *)drec->super.next, &d3); | |
274 | ||
339 | callback_rc = htp_gzip_decompressor_decompress((htp_decompressor_gzip_t *)drec->super.next, &d2); | |
275 | 340 | } else { |
276 | 341 | // Send decompressed data to the callback. |
277 | htp_status_t callback_rc = drec->super.callback(&d2); | |
278 | if (callback_rc != HTP_OK) { | |
279 | inflateEnd(&drec->stream); | |
280 | drec->zlib_initialized = 0; | |
281 | ||
282 | return callback_rc; | |
283 | } | |
284 | } | |
342 | callback_rc = drec->super.callback(&d2); | |
343 | } | |
344 | if (callback_rc != HTP_OK) { | |
345 | htp_gzip_decompressor_end(drec); | |
346 | return callback_rc; | |
347 | } | |
348 | drec->stream.avail_out = GZIP_BUF_SIZE; | |
349 | drec->stream.next_out = drec->buffer; | |
285 | 350 | // TODO Handle trailer. |
286 | 351 | |
287 | 352 | return HTP_OK; |
288 | 353 | } |
289 | 354 | else if (rc != Z_OK) { |
290 | 355 | htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc); |
291 | inflateEnd(&drec->stream); | |
356 | if (drec->zlib_initialized == HTP_COMPRESSION_LZMA) { | |
357 | LzmaDec_Free(&drec->state, &lzma_Alloc); | |
358 | // so as to clean zlib ressources after restart | |
359 | drec->zlib_initialized = HTP_COMPRESSION_NONE; | |
360 | } else { | |
361 | inflateEnd(&drec->stream); | |
362 | } | |
292 | 363 | |
293 | 364 | // see if we want to restart the decompressor |
294 | 365 | if (htp_gzip_decompressor_restart(drec, |
310 | 381 | d2.len = d->len; |
311 | 382 | d2.is_last = d->is_last; |
312 | 383 | |
313 | htp_status_t callback_rc = drec->super.callback(&d2); | |
384 | callback_rc = drec->super.callback(&d2); | |
314 | 385 | if (callback_rc != HTP_OK) { |
315 | 386 | return HTP_ERROR; |
316 | 387 | } |
388 | ||
389 | drec->stream.avail_out = GZIP_BUF_SIZE; | |
390 | drec->stream.next_out = drec->buffer; | |
317 | 391 | |
318 | 392 | /* successfully passed through, lets continue doing that */ |
319 | 393 | drec->passthrough = 1; |
332 | 406 | static void htp_gzip_decompressor_destroy(htp_decompressor_gzip_t *drec) { |
333 | 407 | if (drec == NULL) return; |
334 | 408 | |
335 | if (drec->zlib_initialized) { | |
336 | inflateEnd(&drec->stream); | |
337 | drec->zlib_initialized = 0; | |
338 | } | |
409 | htp_gzip_decompressor_end(drec); | |
339 | 410 | |
340 | 411 | free(drec->buffer); |
341 | 412 | free(drec); |
365 | 436 | // Initialize zlib. |
366 | 437 | int rc; |
367 | 438 | |
368 | if (format == HTP_COMPRESSION_DEFLATE) { | |
369 | // Negative values activate raw processing, | |
370 | // which is what we need for deflate. | |
371 | rc = inflateInit2(&drec->stream, -15); | |
372 | } else { | |
373 | // Increased windows size activates gzip header processing. | |
374 | rc = inflateInit2(&drec->stream, 15 + 32); | |
439 | switch (format) { | |
440 | case HTP_COMPRESSION_LZMA: | |
441 | if (connp->cfg->lzma_memlimit > 0) { | |
442 | LzmaDec_Construct(&drec->state); | |
443 | } else { | |
444 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "LZMA decompression disabled"); | |
445 | drec->passthrough = 1; | |
446 | } | |
447 | rc = Z_OK; | |
448 | break; | |
449 | case HTP_COMPRESSION_DEFLATE: | |
450 | // Negative values activate raw processing, | |
451 | // which is what we need for deflate. | |
452 | rc = inflateInit2(&drec->stream, -15); | |
453 | break; | |
454 | case HTP_COMPRESSION_GZIP: | |
455 | // Increased windows size activates gzip header processing. | |
456 | rc = inflateInit2(&drec->stream, 15 + 32); | |
457 | break; | |
458 | default: | |
459 | // do nothing | |
460 | rc = Z_DATA_ERROR; | |
375 | 461 | } |
376 | 462 | |
377 | 463 | if (rc != Z_OK) { |
378 | 464 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "GZip decompressor: inflateInit2 failed with code %d", rc); |
379 | 465 | |
380 | inflateEnd(&drec->stream); | |
466 | if (format == HTP_COMPRESSION_DEFLATE || format == HTP_COMPRESSION_GZIP) { | |
467 | inflateEnd(&drec->stream); | |
468 | } | |
381 | 469 | free(drec->buffer); |
382 | 470 | free(drec); |
383 | 471 |
43 | 43 | #endif |
44 | 44 | |
45 | 45 | #include <zlib.h> |
46 | #include "lzma/LzmaDec.h" | |
46 | 47 | |
47 | 48 | typedef struct htp_decompressor_gzip_t htp_decompressor_gzip_t; |
48 | 49 | typedef struct htp_decompressor_t htp_decompressor_t; |
65 | 66 | int initialized; |
66 | 67 | #endif |
67 | 68 | int zlib_initialized; |
68 | uint8_t header[10]; | |
69 | uint8_t header_len; | |
70 | 69 | uint8_t restart; /**< deflate restarted to try rfc1950 instead of 1951 */ |
71 | 70 | uint8_t passthrough; /**< decompression failed, pass through raw data */ |
72 | 71 | z_stream stream; |
72 | uint8_t header[LZMA_PROPS_SIZE + 8]; | |
73 | uint8_t header_len; | |
74 | CLzmaDec state; | |
73 | 75 | unsigned char *buffer; |
74 | 76 | unsigned long crc; |
75 | 77 | }; |
41 | 41 | |
42 | 42 | // Array-backed list |
43 | 43 | |
44 | htp_list_t *htp_list_array_create(size_t size) { | |
45 | // It makes no sense to create a zero-size list. | |
46 | if (size == 0) return NULL; | |
47 | ||
48 | // Allocate the list structure. | |
49 | htp_list_array_t *l = calloc(1, sizeof (htp_list_array_t)); | |
50 | if (l == NULL) return NULL; | |
51 | ||
44 | htp_status_t htp_list_array_init(htp_list_t *l, size_t size) { | |
52 | 45 | // Allocate the initial batch of elements. |
53 | 46 | l->elements = malloc(size * sizeof (void *)); |
54 | 47 | if (l->elements == NULL) { |
55 | free(l); | |
56 | return NULL; | |
48 | return HTP_ERROR; | |
57 | 49 | } |
58 | 50 | |
59 | 51 | // Initialize the structure. |
62 | 54 | l->current_size = 0; |
63 | 55 | l->max_size = size; |
64 | 56 | |
57 | return HTP_OK; | |
58 | } | |
59 | ||
60 | htp_list_t *htp_list_array_create(size_t size) { | |
61 | // It makes no sense to create a zero-size list. | |
62 | if (size == 0) return NULL; | |
63 | ||
64 | // Allocate the list structure. | |
65 | htp_list_array_t *l = calloc(1, sizeof (htp_list_array_t)); | |
66 | if (l == NULL) return NULL; | |
67 | ||
68 | if (htp_list_array_init(l, size) == HTP_ERROR) { | |
69 | free(l); | |
70 | return NULL; | |
71 | } | |
72 | ||
65 | 73 | return (htp_list_t *) l; |
66 | 74 | } |
67 | 75 | |
79 | 87 | |
80 | 88 | free(l->elements); |
81 | 89 | free(l); |
90 | } | |
91 | ||
92 | void htp_list_array_release(htp_list_array_t *l) { | |
93 | if (l == NULL) return; | |
94 | ||
95 | free(l->elements); | |
82 | 96 | } |
83 | 97 | |
84 | 98 | void *htp_list_array_get(const htp_list_array_t *l, size_t idx) { |
47 | 47 | #define htp_list_t htp_list_array_t |
48 | 48 | #define htp_list_add htp_list_array_push |
49 | 49 | #define htp_list_create htp_list_array_create |
50 | #define htp_list_init htp_list_array_init | |
50 | 51 | #define htp_list_clear htp_list_array_clear |
51 | 52 | #define htp_list_destroy htp_list_array_destroy |
53 | #define htp_list_release htp_list_array_release | |
52 | 54 | #define htp_list_get htp_list_array_get |
53 | 55 | #define htp_list_pop htp_list_array_pop |
54 | 56 | #define htp_list_push htp_list_array_push |
75 | 77 | htp_list_array_t *htp_list_array_create(size_t size); |
76 | 78 | |
77 | 79 | /** |
80 | * Initialize an array-backed list. | |
81 | * | |
82 | * @param[in] l | |
83 | * @param[in] size | |
84 | * @return HTP_OK or HTP_ERROR if allocation failed | |
85 | */ | |
86 | htp_status_t htp_list_array_init(htp_list_array_t *l, size_t size); | |
87 | ||
88 | /** | |
78 | 89 | * Remove all elements from the list. It is the responsibility of the caller |
79 | 90 | * to iterate over list elements and deallocate them if necessary, prior to |
80 | 91 | * invoking this function. |
92 | 103 | void htp_list_array_destroy(htp_list_array_t *l); |
93 | 104 | |
94 | 105 | /** |
106 | * Free the memory occupied by this list, except itself. | |
107 | * This function assumes the elements held by the list | |
108 | * were freed beforehand. | |
109 | * | |
110 | * @param[in] l | |
111 | */ | |
112 | void htp_list_array_release(htp_list_array_t *l); | |
113 | ||
114 | /** | |
95 | 115 | * Find the element at the given index. |
96 | 116 | * |
97 | 117 | * @param[in] l |
58 | 58 | #include <unistd.h> |
59 | 59 | #include <sys/types.h> |
60 | 60 | #include <sys/stat.h> |
61 | ||
61 | #include <stdint.h> | |
62 | ||
63 | #include "htp_config_auto_gen.h" | |
62 | 64 | #include "htp.h" |
63 | 65 | #include "htp_config_private.h" |
64 | 66 | #include "htp_connection_parser_private.h" |
74 | 76 | #ifndef LF |
75 | 77 | #define LF '\n' |
76 | 78 | #endif |
79 | ||
80 | // 1048576 is 1 Mbyte | |
81 | #define HTP_LZMA_MEMLIMIT 1048576 | |
82 | //deflate max ratio is about 1000 | |
83 | #define HTP_COMPRESSION_BOMB_RATIO 2048 | |
84 | #define HTP_COMPRESSION_BOMB_LIMIT 1048576 | |
77 | 85 | |
78 | 86 | #define HTP_FIELD_LIMIT_HARD 18000 |
79 | 87 | #define HTP_FIELD_LIMIT_SOFT 9000 |
169 | 177 | void htp_utf8_decode_path_inplace(htp_cfg_t *cfg, htp_tx_t *tx, bstr *path); |
170 | 178 | void htp_utf8_validate_path(htp_tx_t *tx, bstr *path); |
171 | 179 | |
172 | int64_t htp_parse_content_length(bstr *b); | |
180 | int64_t htp_parse_content_length(bstr *b, htp_connp_t *connp); | |
173 | 181 | int64_t htp_parse_chunked_length(unsigned char *data, size_t len); |
174 | 182 | int64_t htp_parse_positive_integer_whitespace(unsigned char *data, size_t len, int base); |
175 | 183 | int htp_parse_status(bstr *status); |
328 | 328 | unsigned char *data; |
329 | 329 | size_t len; |
330 | 330 | if (htp_connp_req_consolidate_data(connp, &data, &len) != HTP_OK) { |
331 | fprintf(stderr, "htp_connp_req_consolidate_data fail"); | |
332 | 331 | return HTP_ERROR; |
333 | 332 | } |
334 | 333 | #ifdef HTP_DEBUG |
357 | 356 | #ifdef HTP_DEBUG |
358 | 357 | fprint_raw_data(stderr, "htp_connp_REQ_CONNECT_PROBE_DATA: tunnel contains plain text HTTP", data, len); |
359 | 358 | #endif |
360 | connp->in_state = htp_connp_REQ_FINALIZE; | |
359 | return htp_tx_state_request_complete(connp->in_tx); | |
361 | 360 | } else { |
362 | 361 | #ifdef HTP_DEBUG |
363 | 362 | fprint_raw_data(stderr, "htp_connp_REQ_CONNECT_PROBE_DATA: tunnel is not HTTP", data, len); |
614 | 613 | */ |
615 | 614 | htp_status_t htp_connp_REQ_HEADERS(htp_connp_t *connp) { |
616 | 615 | for (;;) { |
616 | if (connp->in_status == HTP_STREAM_CLOSED) { | |
617 | // Parse previous header, if any. | |
618 | if (connp->in_header != NULL) { | |
619 | if (connp->cfg->process_request_header(connp, bstr_ptr(connp->in_header), | |
620 | bstr_len(connp->in_header)) != HTP_OK) | |
621 | return HTP_ERROR; | |
622 | bstr_free(connp->in_header); | |
623 | connp->in_header = NULL; | |
624 | } | |
625 | ||
626 | htp_connp_req_clear_buffer(connp); | |
627 | ||
628 | connp->in_tx->request_progress = HTP_REQUEST_TRAILER; | |
629 | ||
630 | // We've seen all the request headers. | |
631 | return htp_tx_state_request_headers(connp->in_tx); | |
632 | } | |
617 | 633 | IN_COPY_BYTE_OR_RETURN(connp); |
618 | 634 | |
619 | 635 | // Have we reached the end of the line? |
714 | 730 | connp->in_state = htp_connp_REQ_HEADERS; |
715 | 731 | connp->in_tx->request_progress = HTP_REQUEST_HEADERS; |
716 | 732 | } else { |
733 | // Let's check if the protocol was simply missing | |
734 | int64_t pos = connp->in_current_read_offset; | |
735 | int afterspaces = 0; | |
736 | // Probe if data looks like a header line | |
737 | while (pos < connp->in_current_len) { | |
738 | if (connp->in_current_data[pos] == ':') { | |
739 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: missing protocol"); | |
740 | connp->in_tx->is_protocol_0_9 = 0; | |
741 | // Switch to request header parsing. | |
742 | connp->in_state = htp_connp_REQ_HEADERS; | |
743 | connp->in_tx->request_progress = HTP_REQUEST_HEADERS; | |
744 | return HTP_OK; | |
745 | } else if (htp_is_lws(connp->in_current_data[pos])) { | |
746 | // Allows spaces after header name | |
747 | afterspaces = 1; | |
748 | } else if (htp_is_space(connp->in_current_data[pos]) || afterspaces == 1) { | |
749 | break; | |
750 | } | |
751 | pos++; | |
752 | } | |
717 | 753 | // We're done with this request. |
718 | 754 | connp->in_state = htp_connp_REQ_FINALIZE; |
719 | 755 | } |
791 | 827 | } |
792 | 828 | |
793 | 829 | htp_status_t htp_connp_REQ_FINALIZE(htp_connp_t *connp) { |
830 | size_t bytes_left = connp->in_current_len - connp->in_current_read_offset; | |
831 | ||
832 | if (bytes_left > 0) { | |
833 | // If we have more bytes | |
834 | // Either it is request pipelining | |
835 | // Or we interpret it as body data | |
836 | int64_t pos = connp->in_current_read_offset; | |
837 | int64_t mstart = 0; | |
838 | // skip past leading whitespace. IIS allows this | |
839 | while ((pos < connp->in_current_len) && htp_is_space(connp->in_current_data[pos])) | |
840 | pos++; | |
841 | if (pos < connp->in_current_len) { | |
842 | mstart = pos; | |
843 | // The request method starts at the beginning of the | |
844 | // line and ends with the first whitespace character. | |
845 | while ((pos < connp->in_current_len) && (!htp_is_space(connp->in_current_data[pos]))) | |
846 | pos++; | |
847 | ||
848 | int methodi = HTP_M_UNKNOWN; | |
849 | bstr *method = bstr_dup_mem(connp->in_current_data + mstart, pos - mstart); | |
850 | if (method) { | |
851 | methodi = htp_convert_method_to_number(method); | |
852 | bstr_free(method); | |
853 | } | |
854 | if (methodi == HTP_M_UNKNOWN) { | |
855 | // Interpret remaining bytes as body data | |
856 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected request body"); | |
857 | connp->in_tx->request_progress = HTP_REQUEST_BODY; | |
858 | connp->in_state = htp_connp_REQ_BODY_IDENTITY; | |
859 | connp->in_body_data_left = bytes_left; | |
860 | return HTP_OK; | |
861 | } | |
862 | } | |
863 | } | |
794 | 864 | return htp_tx_state_request_complete(connp->in_tx); |
795 | 865 | } |
796 | 866 |
69 | 69 | if (h_existing != NULL) { |
70 | 70 | // TODO Do we want to have a list of the headers that are |
71 | 71 | // allowed to be combined in this way? |
72 | ||
73 | // Add to the existing header. | |
74 | bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); | |
75 | if (new_value == NULL) { | |
76 | bstr_free(h->name); | |
77 | bstr_free(h->value); | |
78 | free(h); | |
79 | return HTP_ERROR; | |
80 | } | |
81 | ||
82 | h_existing->value = new_value; | |
83 | bstr_add_mem_noex(h_existing->value, ", ", 2); | |
84 | bstr_add_noex(h_existing->value, h->value); | |
72 | if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { | |
73 | // This is the second occurence for this header. | |
74 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); | |
75 | } else { | |
76 | // For simplicity reasons, we count the repetitions of all headers | |
77 | if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { | |
78 | connp->in_tx->req_header_repetitions++; | |
79 | } else { | |
80 | bstr_free(h->name); | |
81 | bstr_free(h->value); | |
82 | free(h); | |
83 | return HTP_OK; | |
84 | } | |
85 | } | |
86 | // Keep track of repeated same-name headers. | |
87 | h_existing->flags |= HTP_FIELD_REPEATED; | |
88 | ||
89 | // Having multiple C-L headers is against the RFC but | |
90 | // servers may ignore the subsequent headers if the values are the same. | |
91 | if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { | |
92 | // Don't use string comparison here because we want to | |
93 | // ignore small formatting differences. | |
94 | ||
95 | int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL); | |
96 | int64_t new_cl = htp_parse_content_length(h->value, NULL); | |
97 | // Ambiguous response C-L value. | |
98 | if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { | |
99 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value"); | |
100 | } | |
101 | // Ignoring the new C-L header that has the same value as the previous ones. | |
102 | } else { | |
103 | // Add to the existing header. | |
104 | bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value)); | |
105 | if (new_value == NULL) { | |
106 | bstr_free(h->name); | |
107 | bstr_free(h->value); | |
108 | free(h); | |
109 | return HTP_ERROR; | |
110 | } | |
111 | ||
112 | h_existing->value = new_value; | |
113 | bstr_add_mem_noex(h_existing->value, ", ", 2); | |
114 | bstr_add_noex(h_existing->value, h->value); | |
115 | } | |
85 | 116 | |
86 | 117 | // The new header structure is no longer needed. |
87 | 118 | bstr_free(h->name); |
88 | 119 | bstr_free(h->value); |
89 | 120 | free(h); |
90 | ||
91 | // Keep track of repeated same-name headers. | |
92 | h_existing->flags |= HTP_FIELD_REPEATED; | |
93 | 121 | } else { |
94 | 122 | // Add as a new header. |
95 | 123 | if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) { |
314 | 342 | } |
315 | 343 | pos++; |
316 | 344 | } |
345 | // Too much performance overhead for fuzzing | |
346 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |
317 | 347 | if (bad_delim) { |
318 | 348 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI"); |
319 | 349 | } |
350 | #endif | |
320 | 351 | |
321 | 352 | // Is there anything after the request method? |
322 | 353 | if (pos == len) { |
324 | 355 | |
325 | 356 | tx->is_protocol_0_9 = 1; |
326 | 357 | tx->request_protocol_number = HTP_PROTOCOL_0_9; |
358 | if (tx->request_method_number == HTP_M_UNKNOWN) | |
359 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only"); | |
327 | 360 | |
328 | 361 | return HTP_OK; |
329 | 362 | } |
346 | 379 | pos = start; |
347 | 380 | while ((pos < len) && (!htp_is_space(data[pos]))) pos++; |
348 | 381 | } |
382 | // Too much performance overhead for fuzzing | |
383 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |
349 | 384 | if (bad_delim) { |
350 | 385 | // warn regardless if we've seen non-compliant chars |
351 | 386 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter"); |
352 | 387 | } |
388 | #endif | |
353 | 389 | |
354 | 390 | tx->request_uri = bstr_dup_mem(data + start, pos - start); |
355 | 391 | if (tx->request_uri == NULL) return HTP_ERROR; |
367 | 403 | |
368 | 404 | tx->is_protocol_0_9 = 1; |
369 | 405 | tx->request_protocol_number = HTP_PROTOCOL_0_9; |
406 | if (tx->request_method_number == HTP_M_UNKNOWN) | |
407 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol"); | |
370 | 408 | |
371 | 409 | return HTP_OK; |
372 | 410 | } |
376 | 414 | if (tx->request_protocol == NULL) return HTP_ERROR; |
377 | 415 | |
378 | 416 | tx->request_protocol_number = htp_parse_protocol(tx->request_protocol); |
417 | if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID) | |
418 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol"); | |
379 | 419 | |
380 | 420 | #ifdef HTP_DEBUG |
381 | 421 | fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol)); |
418 | 418 | connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; |
419 | 419 | |
420 | 420 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, |
421 | "Response chunk encoding: Invalid chunk length: %d", | |
421 | "Response chunk encoding: Invalid chunk length: %"PRId64"", | |
422 | 422 | connp->out_chunked_length); |
423 | 423 | return HTP_OK; |
424 | 424 | } |
457 | 457 | bytes_to_consume = connp->out_current_len - connp->out_current_read_offset; |
458 | 458 | } |
459 | 459 | |
460 | if (connp->out_status == HTP_STREAM_CLOSED) { | |
461 | connp->out_state = htp_connp_RES_FINALIZE; | |
462 | // Sends close signal to decompressors | |
463 | htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0); | |
464 | return rc; | |
465 | } | |
460 | 466 | if (bytes_to_consume == 0) return HTP_DATA; |
461 | 467 | |
462 | 468 | // Consume the data. |
469 | 475 | connp->out_stream_offset += bytes_to_consume; |
470 | 476 | connp->out_body_data_left -= bytes_to_consume; |
471 | 477 | |
472 | // Have we seen the entire response body? | |
478 | // Have we seen the entire response body? | |
473 | 479 | if (connp->out_body_data_left == 0) { |
474 | 480 | connp->out_state = htp_connp_RES_FINALIZE; |
475 | return HTP_OK; | |
481 | // Tells decompressors to output partially decompressed data | |
482 | rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0); | |
483 | return rc; | |
476 | 484 | } |
477 | 485 | |
478 | 486 | return HTP_DATA; |
549 | 557 | } |
550 | 558 | } |
551 | 559 | |
560 | htp_header_t *cl = htp_table_get_c(connp->out_tx->response_headers, "content-length"); | |
561 | htp_header_t *te = htp_table_get_c(connp->out_tx->response_headers, "transfer-encoding"); | |
562 | ||
552 | 563 | // Check for "101 Switching Protocol" response. |
553 | 564 | // If it's seen, it means that traffic after empty line following headers |
554 | 565 | // is no longer HTTP. We can treat it similarly to CONNECT. |
556 | 567 | // rather unlikely, so don't try to probe tunnel for nested HTTP, |
557 | 568 | // and switch to tunnel mode right away. |
558 | 569 | if (connp->out_tx->response_status_number == 101) { |
559 | connp->out_state = htp_connp_RES_FINALIZE; | |
560 | ||
561 | connp->in_status = HTP_STREAM_TUNNEL; | |
562 | connp->out_status = HTP_STREAM_TUNNEL; | |
563 | ||
564 | // we may have response headers | |
565 | htp_status_t rc = htp_tx_state_response_headers(connp->out_tx); | |
566 | return rc; | |
570 | if (te == NULL && cl == NULL) { | |
571 | connp->out_state = htp_connp_RES_FINALIZE; | |
572 | ||
573 | connp->in_status = HTP_STREAM_TUNNEL; | |
574 | connp->out_status = HTP_STREAM_TUNNEL; | |
575 | ||
576 | // we may have response headers | |
577 | htp_status_t rc = htp_tx_state_response_headers(connp->out_tx); | |
578 | return rc; | |
579 | } else { | |
580 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Switching Protocol with Content-Length"); | |
581 | } | |
567 | 582 | } |
568 | 583 | |
569 | 584 | // Check for an interim "100 Continue" response. Ignore it if found, and revert back to RES_LINE. |
570 | if (connp->out_tx->response_status_number == 100) { | |
585 | if (connp->out_tx->response_status_number == 100 && te == NULL && cl == NULL) { | |
571 | 586 | if (connp->out_tx->seen_100continue != 0) { |
572 | 587 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Already seen 100-Continue."); |
573 | 588 | return HTP_ERROR; |
597 | 612 | // request) is always terminated by the first empty line after the |
598 | 613 | // header fields, regardless of the entity-header fields present in the |
599 | 614 | // message. |
600 | if (((connp->out_tx->response_status_number >= 100) && (connp->out_tx->response_status_number <= 199)) | |
601 | || (connp->out_tx->response_status_number == 204) || (connp->out_tx->response_status_number == 304) | |
602 | || (connp->out_tx->request_method_number == HTP_M_HEAD)) { | |
603 | // There's no response body | |
615 | if (connp->out_tx->request_method_number == HTP_M_HEAD) { | |
616 | // There's no response body whatsoever | |
604 | 617 | connp->out_tx->response_transfer_coding = HTP_CODING_NO_BODY; |
605 | 618 | connp->out_state = htp_connp_RES_FINALIZE; |
606 | } else { | |
619 | } | |
620 | else if (((connp->out_tx->response_status_number >= 100) && (connp->out_tx->response_status_number <= 199)) | |
621 | || (connp->out_tx->response_status_number == 204) || (connp->out_tx->response_status_number == 304)) { | |
622 | // There should be no response body | |
623 | // but browsers interpret content sent by the server as such | |
624 | if (te == NULL && cl == NULL) { | |
625 | connp->out_tx->response_transfer_coding = HTP_CODING_NO_BODY; | |
626 | connp->out_state = htp_connp_RES_FINALIZE; | |
627 | } else { | |
628 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected Response body"); | |
629 | } | |
630 | } | |
631 | // Hack condition to check that we do not assume "no body" | |
632 | if (connp->out_state != htp_connp_RES_FINALIZE) { | |
607 | 633 | // We have a response body |
608 | ||
609 | 634 | htp_header_t *ct = htp_table_get_c(connp->out_tx->response_headers, "content-type"); |
610 | htp_header_t *cl = htp_table_get_c(connp->out_tx->response_headers, "content-length"); | |
611 | htp_header_t *te = htp_table_get_c(connp->out_tx->response_headers, "transfer-encoding"); | |
612 | ||
613 | 635 | if (ct != NULL) { |
614 | 636 | connp->out_tx->response_content_type = bstr_dup_lower(ct->value); |
615 | 637 | if (connp->out_tx->response_content_type == NULL) return HTP_ERROR; |
632 | 654 | // 2. If a Transfer-Encoding header field (section 14.40) is present and |
633 | 655 | // indicates that the "chunked" transfer coding has been applied, then |
634 | 656 | // the length is defined by the chunked encoding (section 3.6). |
635 | if ((te != NULL) && (bstr_index_of_c_nocase(te->value, "chunked") != -1)) { | |
657 | if ((te != NULL) && (bstr_index_of_c_nocasenorzero(te->value, "chunked") != -1)) { | |
636 | 658 | if (bstr_cmp_c_nocase(te->value, "chunked") != 0) { |
637 | 659 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, |
638 | 660 | "Transfer-encoding has abnormal chunked value"); |
668 | 690 | } |
669 | 691 | |
670 | 692 | // Get body length |
671 | connp->out_tx->response_content_length = htp_parse_content_length(cl->value); | |
693 | connp->out_tx->response_content_length = htp_parse_content_length(cl->value, connp); | |
672 | 694 | if (connp->out_tx->response_content_length < 0) { |
673 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Invalid C-L field in response: %d", | |
695 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Invalid C-L field in response: %"PRId64"", | |
674 | 696 | connp->out_tx->response_content_length); |
675 | 697 | return HTP_ERROR; |
676 | 698 | } else { |
726 | 748 | * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. |
727 | 749 | */ |
728 | 750 | htp_status_t htp_connp_RES_HEADERS(htp_connp_t *connp) { |
751 | int endwithcr; | |
752 | int lfcrending = 0; | |
753 | ||
729 | 754 | for (;;) { |
755 | if (connp->out_status == HTP_STREAM_CLOSED) { | |
756 | // Finalize sending raw trailer data. | |
757 | htp_status_t rc = htp_connp_res_receiver_finalize_clear(connp); | |
758 | if (rc != HTP_OK) return rc; | |
759 | ||
760 | // Run hook response_TRAILER. | |
761 | rc = htp_hook_run_all(connp->cfg->hook_response_trailer, connp->out_tx); | |
762 | if (rc != HTP_OK) return rc; | |
763 | ||
764 | connp->out_state = htp_connp_RES_FINALIZE; | |
765 | return HTP_OK; | |
766 | } | |
730 | 767 | OUT_COPY_BYTE_OR_RETURN(connp); |
731 | 768 | |
732 | 769 | // Have we reached the end of the line? |
733 | if (connp->out_next_byte == LF || connp->out_next_byte == CR) { | |
734 | ||
770 | if (connp->out_next_byte != LF && connp->out_next_byte != CR) { | |
771 | lfcrending = 0; | |
772 | } else { | |
773 | endwithcr = 0; | |
735 | 774 | if (connp->out_next_byte == CR) { |
736 | 775 | OUT_PEEK_NEXT(connp); |
737 | 776 | if (connp->out_next_byte == -1) { |
738 | 777 | return HTP_DATA_BUFFER; |
739 | 778 | } else if (connp->out_next_byte == LF) { |
740 | 779 | OUT_COPY_BYTE_OR_RETURN(connp); |
780 | if (lfcrending) { | |
781 | // Handling LFCRCRLFCRLF | |
782 | // These 6 characters mean only 2 end of lines | |
783 | OUT_PEEK_NEXT(connp); | |
784 | if (connp->out_next_byte == CR) { | |
785 | OUT_COPY_BYTE_OR_RETURN(connp); | |
786 | connp->out_current_consume_offset++; | |
787 | OUT_PEEK_NEXT(connp); | |
788 | if (connp->out_next_byte == LF) { | |
789 | OUT_COPY_BYTE_OR_RETURN(connp); | |
790 | connp->out_current_consume_offset++; | |
791 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, | |
792 | "Weird response end of lines mix"); | |
793 | } | |
794 | } | |
795 | } | |
796 | } else if (connp->out_next_byte == CR) { | |
797 | continue; | |
798 | } | |
799 | lfcrending = 0; | |
800 | endwithcr = 1; | |
801 | } else { | |
802 | // connp->out_next_byte == LF | |
803 | OUT_PEEK_NEXT(connp); | |
804 | lfcrending = 0; | |
805 | if (connp->out_next_byte == CR) { | |
806 | // hanldes LF-CR sequence as end of line | |
807 | OUT_COPY_BYTE_OR_RETURN(connp); | |
808 | lfcrending = 1; | |
741 | 809 | } |
742 | 810 | } |
743 | 811 | |
746 | 814 | |
747 | 815 | if (htp_connp_res_consolidate_data(connp, &data, &len) != HTP_OK) { |
748 | 816 | return HTP_ERROR; |
817 | } | |
818 | ||
819 | // CRCRLF is not an empty line | |
820 | if (endwithcr && len < 2) { | |
821 | continue; | |
749 | 822 | } |
750 | 823 | |
751 | 824 | #ifdef HTP_DEBUG |
829 | 902 | connp->out_header = bstr_dup_mem(data, len); |
830 | 903 | if (connp->out_header == NULL) return HTP_ERROR; |
831 | 904 | } else { |
832 | // Add to the existing header. | |
833 | bstr *new_out_header = bstr_add_mem(connp->out_header, data, len); | |
834 | if (new_out_header == NULL) return HTP_ERROR; | |
835 | connp->out_header = new_out_header; | |
905 | size_t colon_pos = 0; | |
906 | while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++; | |
907 | ||
908 | if (colon_pos < len && | |
909 | bstr_chr(connp->out_header, ':') >= 0 && | |
910 | connp->out_tx->response_protocol_number == HTP_PROTOCOL_1_1) { | |
911 | // Warn only once per transaction. | |
912 | if (!(connp->out_tx->flags & HTP_INVALID_FOLDING)) { | |
913 | connp->out_tx->flags |= HTP_INVALID_FOLDING; | |
914 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Invalid response field folding"); | |
915 | } | |
916 | if (connp->cfg->process_response_header(connp, bstr_ptr(connp->out_header), | |
917 | bstr_len(connp->out_header)) != HTP_OK) | |
918 | return HTP_ERROR; | |
919 | bstr_free(connp->out_header); | |
920 | connp->out_header = bstr_dup_mem(data+1, len-1); | |
921 | if (connp->out_header == NULL) | |
922 | return HTP_ERROR; | |
923 | } else { | |
924 | // Add to the existing header. | |
925 | bstr *new_out_header = bstr_add_mem(connp->out_header, data, len); | |
926 | if (new_out_header == NULL) | |
927 | return HTP_ERROR; | |
928 | connp->out_header = new_out_header; | |
929 | } | |
836 | 930 | } |
837 | 931 | } |
838 | 932 | |
860 | 954 | // Have we reached the end of the line? We treat stream closure as end of line in |
861 | 955 | // order to handle the case when the first line of the response is actually response body |
862 | 956 | // (and we wish it processed as such). |
957 | if (connp->out_next_byte == CR) { | |
958 | OUT_PEEK_NEXT(connp); | |
959 | if (connp->out_next_byte == -1) { | |
960 | return HTP_DATA_BUFFER; | |
961 | } else if (connp->out_next_byte == LF) { | |
962 | continue; | |
963 | } | |
964 | connp->out_next_byte = LF; | |
965 | } | |
863 | 966 | if ((connp->out_next_byte == LF)||(connp->out_status == HTP_STREAM_CLOSED)) { |
864 | 967 | unsigned char *data; |
865 | 968 | size_t len; |
874 | 977 | |
875 | 978 | // Is this a line that should be ignored? |
876 | 979 | if (htp_connp_is_line_ignorable(connp, data, len)) { |
980 | if (connp->out_status == HTP_STREAM_CLOSED) { | |
981 | connp->out_state = htp_connp_RES_FINALIZE; | |
982 | } | |
877 | 983 | // We have an empty/whitespace line, which we'll note, ignore and move on |
878 | 984 | connp->out_tx->response_ignored_lines++; |
879 | 985 | |
918 | 1024 | if (htp_treat_response_line_as_body(data, len)) { |
919 | 1025 | connp->out_tx->response_content_encoding_processing = HTP_COMPRESSION_NONE; |
920 | 1026 | |
1027 | connp->out_current_consume_offset = connp->out_current_read_offset; | |
921 | 1028 | htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, data, len + chomp_result); |
922 | 1029 | if (rc != HTP_OK) return rc; |
923 | 1030 | |
924 | 1031 | // Continue to process response body. Because we don't have |
925 | 1032 | // any headers to parse, we assume the body continues until |
926 | 1033 | // the end of the stream. |
927 | connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; | |
928 | connp->out_tx->response_progress = HTP_RESPONSE_BODY; | |
929 | connp->out_state = htp_connp_RES_BODY_IDENTITY_STREAM_CLOSE; | |
930 | connp->out_body_data_left = -1; | |
931 | ||
932 | // Clean response line allocations when processed as body | |
933 | bstr_free(connp->out_tx->response_line); | |
934 | connp->out_tx->response_line = NULL; | |
935 | ||
936 | bstr_free(connp->out_tx->response_protocol); | |
937 | connp->out_tx->response_protocol = NULL; | |
938 | ||
939 | bstr_free(connp->out_tx->response_status); | |
940 | connp->out_tx->response_status = NULL; | |
941 | ||
942 | bstr_free(connp->out_tx->response_message); | |
943 | connp->out_tx->response_message = NULL; | |
1034 | ||
1035 | // Have we seen the entire response body? | |
1036 | if (connp->out_current_len <= connp->out_current_read_offset) { | |
1037 | connp->out_tx->response_transfer_coding = HTP_CODING_IDENTITY; | |
1038 | connp->out_tx->response_progress = HTP_RESPONSE_BODY; | |
1039 | connp->out_body_data_left = -1; | |
1040 | connp->out_state = htp_connp_RES_FINALIZE; | |
1041 | } | |
944 | 1042 | |
945 | 1043 | return HTP_OK; |
946 | 1044 | } |
971 | 1069 | } |
972 | 1070 | |
973 | 1071 | htp_status_t htp_connp_RES_FINALIZE(htp_connp_t *connp) { |
1072 | int bytes_left = connp->out_current_len - connp->out_current_read_offset; | |
1073 | unsigned char * data = connp->out_current_data + connp->out_current_read_offset; | |
1074 | ||
1075 | if (bytes_left > 0 && | |
1076 | htp_treat_response_line_as_body(data, bytes_left)) { | |
1077 | // Interpret remaining bytes as body data | |
1078 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Unexpected response body"); | |
1079 | connp->out_current_read_offset += bytes_left; | |
1080 | connp->out_current_consume_offset += bytes_left; | |
1081 | connp->out_stream_offset += bytes_left; | |
1082 | connp->out_body_data_left -= bytes_left; | |
1083 | htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, data, bytes_left); | |
1084 | return rc; | |
1085 | } | |
1086 | ||
974 | 1087 | return htp_tx_state_response_complete_ex(connp->out_tx, 0 /* not hybrid mode */); |
975 | 1088 | } |
976 | 1089 |
170 | 170 | |
171 | 171 | name_end = colon_pos; |
172 | 172 | |
173 | // Ignore LWS after field-name. | |
173 | // Ignore unprintable after field-name. | |
174 | 174 | prev = name_end; |
175 | while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) { | |
175 | while ((prev > name_start) && (data[prev - 1] <= 0x20)) { | |
176 | 176 | prev--; |
177 | 177 | name_end--; |
178 | 178 | |
213 | 213 | } |
214 | 214 | |
215 | 215 | i++; |
216 | } | |
217 | for (i = value_start; i < value_end; i++) { | |
218 | if (data[i] == 0) { | |
219 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response header value contains null."); | |
220 | break; | |
221 | } | |
216 | 222 | } |
217 | 223 | |
218 | 224 | // Now extract the name and the value. |
255 | 261 | htp_header_t *h_existing = htp_table_get(connp->out_tx->response_headers, h->name); |
256 | 262 | if (h_existing != NULL) { |
257 | 263 | // Keep track of repeated same-name headers. |
264 | if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) { | |
265 | // This is the second occurence for this header. | |
266 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header"); | |
267 | } else { | |
268 | // For simplicity reasons, we count the repetitions of all headers | |
269 | if (connp->out_tx->res_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) { | |
270 | connp->out_tx->res_header_repetitions++; | |
271 | } else { | |
272 | bstr_free(h->name); | |
273 | bstr_free(h->value); | |
274 | free(h); | |
275 | return HTP_OK; | |
276 | } | |
277 | } | |
258 | 278 | h_existing->flags |= HTP_FIELD_REPEATED; |
259 | ||
279 | ||
260 | 280 | // Having multiple C-L headers is against the RFC but many |
261 | 281 | // browsers ignore the subsequent headers if the values are the same. |
262 | 282 | if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) { |
265 | 285 | |
266 | 286 | int64_t existing_cl, new_cl; |
267 | 287 | |
268 | existing_cl = htp_parse_content_length(h_existing->value); | |
269 | new_cl = htp_parse_content_length(h->value); | |
288 | existing_cl = htp_parse_content_length(h_existing->value, NULL); | |
289 | new_cl = htp_parse_content_length(h->value, NULL); | |
270 | 290 | if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) { |
271 | 291 | // Ambiguous response C-L value. |
272 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Ambiguous response C-L value"); | |
273 | ||
274 | bstr_free(h->name); | |
275 | bstr_free(h->value); | |
276 | free(h); | |
277 | ||
278 | return HTP_ERROR; | |
292 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous response C-L value"); | |
279 | 293 | } |
280 | 294 | |
281 | 295 | // Ignoring the new C-L header that has the same value as the previous ones. |
41 | 41 | |
42 | 42 | static htp_status_t _htp_table_add(htp_table_t *table, const bstr *key, const void *element) { |
43 | 43 | // Add key. |
44 | if (htp_list_add(table->list, (void *)key) != HTP_OK) return HTP_ERROR; | |
44 | if (htp_list_add(&table->list, (void *)key) != HTP_OK) return HTP_ERROR; | |
45 | 45 | |
46 | 46 | // Add element. |
47 | if (htp_list_add(table->list, (void *)element) != HTP_OK) { | |
48 | htp_list_pop(table->list); | |
47 | if (htp_list_add(&table->list, (void *)element) != HTP_OK) { | |
48 | htp_list_pop(&table->list); | |
49 | 49 | return HTP_ERROR; |
50 | 50 | } |
51 | 51 | |
129 | 129 | // Free the table keys, but only if we're managing them. |
130 | 130 | if ((table->alloc_type == HTP_TABLE_KEYS_COPIED)||(table->alloc_type == HTP_TABLE_KEYS_ADOPTED)) { |
131 | 131 | bstr *key = NULL; |
132 | for (size_t i = 0, n = htp_list_size(table->list); i < n; i += 2) { | |
133 | key = htp_list_get(table->list, i); | |
132 | for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { | |
133 | key = htp_list_get(&table->list, i); | |
134 | 134 | bstr_free(key); |
135 | 135 | } |
136 | 136 | } |
137 | 137 | |
138 | htp_list_clear(table->list); | |
138 | htp_list_clear(&table->list); | |
139 | 139 | } |
140 | 140 | |
141 | 141 | void htp_table_clear_ex(htp_table_t *table) { |
143 | 143 | |
144 | 144 | // This function does not free table keys. |
145 | 145 | |
146 | htp_list_clear(table->list); | |
146 | htp_list_clear(&table->list); | |
147 | 147 | } |
148 | 148 | |
149 | 149 | htp_table_t *htp_table_create(size_t size) { |
155 | 155 | table->alloc_type = HTP_TABLE_KEYS_ALLOC_UKNOWN; |
156 | 156 | |
157 | 157 | // Use a list behind the scenes. |
158 | table->list = htp_list_array_create(size * 2); | |
159 | if (table->list == NULL) { | |
158 | if (htp_list_init(&table->list, size * 2) == HTP_ERROR) { | |
160 | 159 | free(table); |
161 | 160 | return NULL; |
162 | 161 | } |
169 | 168 | |
170 | 169 | htp_table_clear(table); |
171 | 170 | |
172 | htp_list_destroy(table->list); | |
173 | table->list = NULL; | |
171 | htp_list_array_release(&table->list); | |
174 | 172 | |
175 | 173 | free(table); |
176 | 174 | } |
190 | 188 | |
191 | 189 | // Iterate through the list, comparing |
192 | 190 | // keys with the parameter, return data if found. |
193 | for (size_t i = 0, n = htp_list_size(table->list); i < n; i += 2) { | |
194 | bstr *key_candidate = htp_list_get(table->list, i); | |
195 | void *element = htp_list_get(table->list, i + 1); | |
191 | for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { | |
192 | bstr *key_candidate = htp_list_get(&table->list, i); | |
193 | void *element = htp_list_get(&table->list, i + 1); | |
196 | 194 | if (bstr_cmp_nocase(key_candidate, key) == 0) { |
197 | 195 | return element; |
198 | 196 | } |
206 | 204 | |
207 | 205 | // Iterate through the list, comparing |
208 | 206 | // keys with the parameter, return data if found. |
209 | for (size_t i = 0, n = htp_list_size(table->list); i < n; i += 2) { | |
210 | bstr *key_candidate = htp_list_get(table->list, i); | |
211 | void *element = htp_list_get(table->list, i + 1); | |
212 | if (bstr_cmp_c_nocase(key_candidate, ckey) == 0) { | |
207 | for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { | |
208 | bstr *key_candidate = htp_list_get(&table->list, i); | |
209 | void *element = htp_list_get(&table->list, i + 1); | |
210 | if (bstr_cmp_c_nocasenorzero(key_candidate, ckey) == 0) { | |
213 | 211 | return element; |
214 | 212 | } |
215 | 213 | } |
220 | 218 | void *htp_table_get_index(const htp_table_t *table, size_t idx, bstr **key) { |
221 | 219 | if (table == NULL) return NULL; |
222 | 220 | |
223 | if (idx >= htp_list_size(table->list)) return NULL; | |
221 | if (idx >= htp_list_size(&table->list)) return NULL; | |
224 | 222 | |
225 | 223 | if (key != NULL) { |
226 | *key = htp_list_get(table->list, idx * 2); | |
227 | } | |
228 | ||
229 | return htp_list_get(table->list, (idx * 2) + 1); | |
224 | *key = htp_list_get(&table->list, idx * 2); | |
225 | } | |
226 | ||
227 | return htp_list_get(&table->list, (idx * 2) + 1); | |
230 | 228 | } |
231 | 229 | |
232 | 230 | void *htp_table_get_mem(const htp_table_t *table, const void *key, size_t key_len) { |
234 | 232 | |
235 | 233 | // Iterate through the list, comparing |
236 | 234 | // keys with the parameter, return data if found. |
237 | for (size_t i = 0, n = htp_list_size(table->list); i < n; i += 2) { | |
238 | bstr *key_candidate = htp_list_get(table->list, i); | |
239 | void *element = htp_list_get(table->list, i + 1); | |
235 | for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) { | |
236 | bstr *key_candidate = htp_list_get(&table->list, i); | |
237 | void *element = htp_list_get(&table->list, i + 1); | |
240 | 238 | if (bstr_cmp_mem_nocase(key_candidate, key, key_len) == 0) { |
241 | 239 | return element; |
242 | 240 | } |
247 | 245 | |
248 | 246 | size_t htp_table_size(const htp_table_t *table) { |
249 | 247 | if (table == NULL) return 0; |
250 | return htp_list_size(table->list) / 2; | |
251 | } | |
248 | return htp_list_size(&table->list) / 2; | |
249 | } |
61 | 61 | |
62 | 62 | struct htp_table_t { |
63 | 63 | /** Table key and value pairs are stored in this list; name first, then value. */ |
64 | htp_list_t *list; | |
64 | htp_list_t list; | |
65 | 65 | |
66 | 66 | /** |
67 | 67 | * Key management strategy. Initially set to HTP_TABLE_KEYS_ALLOC_UKNOWN. The |
420 | 420 | } |
421 | 421 | |
422 | 422 | // Get the body length. |
423 | tx->request_content_length = htp_parse_content_length(cl->value); | |
423 | tx->request_content_length = htp_parse_content_length(cl->value, tx->connp); | |
424 | 424 | if (tx->request_content_length < 0) { |
425 | 425 | tx->request_transfer_coding = HTP_CODING_INVALID; |
426 | 426 | tx->flags |= HTP_REQUEST_INVALID_C_L; |
788 | 788 | // Invoke all callbacks. |
789 | 789 | htp_status_t rc = htp_res_run_hook_body_data(d->tx->connp, d); |
790 | 790 | if (rc != HTP_OK) return HTP_ERROR; |
791 | if (d->tx->response_entity_len > d->tx->connp->cfg->compression_bomb_limit && | |
792 | d->tx->response_entity_len > HTP_COMPRESSION_BOMB_RATIO * d->tx->response_message_len) { | |
793 | htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, | |
794 | "Compression bomb: decompressed %"PRId64" bytes out of %"PRId64, | |
795 | d->tx->response_entity_len, d->tx->response_message_len); | |
796 | return HTP_ERROR; | |
797 | } | |
791 | 798 | |
792 | 799 | return HTP_OK; |
793 | 800 | } |
821 | 828 | switch (tx->response_content_encoding_processing) { |
822 | 829 | case HTP_COMPRESSION_GZIP: |
823 | 830 | case HTP_COMPRESSION_DEFLATE: |
831 | case HTP_COMPRESSION_LZMA: | |
824 | 832 | // In severe memory stress these could be NULL |
825 | 833 | if (tx->connp->out_decompressor == NULL || tx->connp->out_decompressor->decompress == NULL) |
826 | 834 | return HTP_ERROR; |
1165 | 1173 | htp_header_t *ce = htp_table_get_c(tx->response_headers, "content-encoding"); |
1166 | 1174 | if (ce != NULL) { |
1167 | 1175 | /* fast paths: regular gzip and friends */ |
1168 | if ((bstr_cmp_c_nocase(ce->value, "gzip") == 0) || (bstr_cmp_c_nocase(ce->value, "x-gzip") == 0)) { | |
1176 | if ((bstr_cmp_c_nocasenorzero(ce->value, "gzip") == 0) || | |
1177 | (bstr_cmp_c_nocasenorzero(ce->value, "x-gzip") == 0)) { | |
1169 | 1178 | tx->response_content_encoding = HTP_COMPRESSION_GZIP; |
1170 | } else if ((bstr_cmp_c_nocase(ce->value, "deflate") == 0) || (bstr_cmp_c_nocase(ce->value, "x-deflate") == 0)) { | |
1179 | } else if ((bstr_cmp_c_nocasenorzero(ce->value, "deflate") == 0) || | |
1180 | (bstr_cmp_c_nocasenorzero(ce->value, "x-deflate") == 0)) { | |
1171 | 1181 | tx->response_content_encoding = HTP_COMPRESSION_DEFLATE; |
1172 | } else if (bstr_cmp_c_nocase(ce->value, "inflate") == 0) { | |
1182 | } else if (bstr_cmp_c_nocasenorzero(ce->value, "lzma") == 0) { | |
1183 | tx->response_content_encoding = HTP_COMPRESSION_LZMA; | |
1184 | } else if (bstr_cmp_c_nocasenorzero(ce->value, "inflate") == 0) { | |
1173 | 1185 | // ignore |
1174 | 1186 | } else { |
1175 | 1187 | /* exceptional cases: enter slow path */ |
1206 | 1218 | // supported algorithms. |
1207 | 1219 | if ((tx->response_content_encoding_processing == HTP_COMPRESSION_GZIP) || |
1208 | 1220 | (tx->response_content_encoding_processing == HTP_COMPRESSION_DEFLATE) || |
1221 | (tx->response_content_encoding_processing == HTP_COMPRESSION_LZMA) || | |
1209 | 1222 | ce_multi_comp) |
1210 | 1223 | { |
1211 | 1224 | if (tx->connp->out_decompressor != NULL) { |
1267 | 1280 | "C-E deflate has abnormal value"); |
1268 | 1281 | } |
1269 | 1282 | cetype = HTP_COMPRESSION_DEFLATE; |
1283 | } else if (bstr_util_cmp_mem(tok, tok_len, "lzma", 4) == 0) { | |
1284 | cetype = HTP_COMPRESSION_LZMA; | |
1270 | 1285 | } else if (bstr_util_cmp_mem(tok, tok_len, "inflate", 7) == 0) { |
1271 | 1286 | cetype = HTP_COMPRESSION_NONE; |
1272 | 1287 | } else { |
115 | 115 | } |
116 | 116 | |
117 | 117 | /** |
118 | * Remove all line terminators (LF or CRLF) from | |
118 | * Remove all line terminators (LF, CR or CRLF) from | |
119 | 119 | * the end of the line provided as input. |
120 | 120 | * |
121 | 121 | * @return 0 if nothing was removed, 1 if one or more LF characters were removed, or |
138 | 138 | (*len)--; |
139 | 139 | r = 2; |
140 | 140 | } |
141 | } else if (data[*len - 1] == CR) { | |
142 | (*len)--; | |
143 | r = 1; | |
141 | 144 | } else return r; |
142 | 145 | } |
143 | 146 | |
208 | 211 | } |
209 | 212 | |
210 | 213 | /** |
211 | * Is the given line empty? This function expects the line to have | |
212 | * a terminating LF. | |
214 | * Is the given line empty? | |
213 | 215 | * |
214 | 216 | * @param[in] data |
215 | 217 | * @param[in] len |
216 | 218 | * @return 0 or 1 |
217 | 219 | */ |
218 | 220 | int htp_is_line_empty(unsigned char *data, size_t len) { |
219 | if ((len == 1) || ((len == 2) && (data[0] == CR))) { | |
221 | if ((len == 1) || | |
222 | ((len == 2) && (data[0] == CR) && (data[1] == LF))) { | |
220 | 223 | return 1; |
221 | 224 | } |
222 | 225 | |
249 | 252 | * @param[in] b |
250 | 253 | * @return Content-Length as a number, or -1 on error. |
251 | 254 | */ |
252 | int64_t htp_parse_content_length(bstr *b) { | |
253 | return htp_parse_positive_integer_whitespace((unsigned char *) bstr_ptr(b), bstr_len(b), 10); | |
255 | int64_t htp_parse_content_length(bstr *b, htp_connp_t *connp) { | |
256 | size_t len = bstr_len(b); | |
257 | unsigned char * data = (unsigned char *) bstr_ptr(b); | |
258 | size_t pos = 0; | |
259 | int64_t r = 0; | |
260 | ||
261 | if (len == 0) return -1003; | |
262 | ||
263 | // Ignore junk before | |
264 | while ((pos < len) && (data[pos] < '0' || data[pos] > '9')) { | |
265 | if (!htp_is_lws(data[pos]) && connp != NULL && r == 0) { | |
266 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, | |
267 | "C-L value with extra data in the beginnning"); | |
268 | r = -1; | |
269 | } | |
270 | pos++; | |
271 | } | |
272 | if (pos == len) return -1001; | |
273 | ||
274 | r = bstr_util_mem_to_pint(data + pos, len - pos, 10, &pos); | |
275 | // Ok to have junk afterwards | |
276 | if (pos < len && connp != NULL) { | |
277 | htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, | |
278 | "C-L value with extra data in the end"); | |
279 | } | |
280 | return r; | |
254 | 281 | } |
255 | 282 | |
256 | 283 | /** |
424 | 451 | } |
425 | 452 | |
426 | 453 | int htp_is_folding_char(int c) { |
427 | if (htp_is_lws(c)) return 1; | |
454 | if (htp_is_lws(c) || c == 0) return 1; | |
428 | 455 | else return 0; |
429 | 456 | } |
430 | 457 | |
450 | 477 | // Treat an empty line as terminator |
451 | 478 | if (htp_is_line_empty(data, len)) { |
452 | 479 | return 1; |
480 | } | |
481 | // Only space is terminator if terminator does not follow right away | |
482 | if (len == 2 && htp_is_lws(data[0]) && data[1] == LF) { | |
483 | if (connp->out_current_read_offset < connp->out_current_len && | |
484 | connp->out_current_data[connp->out_current_read_offset] != LF) { | |
485 | return 1; | |
486 | } | |
453 | 487 | } |
454 | 488 | break; |
455 | 489 | } |
2365 | 2399 | // Firefox 3.5.x: (?i)^\s*http |
2366 | 2400 | // IE: (?i)^\s*http\s*/ |
2367 | 2401 | // Safari: ^HTTP/\d+\.\d+\s+\d{3} |
2402 | size_t pos = 0; | |
2368 | 2403 | |
2369 | 2404 | if (data == NULL) return 1; |
2370 | if (len < 4) return 1; | |
2371 | ||
2372 | if ((data[0] != 'H') && (data[0] != 'h')) return 1; | |
2373 | if ((data[1] != 'T') && (data[1] != 't')) return 1; | |
2374 | if ((data[2] != 'T') && (data[2] != 't')) return 1; | |
2375 | if ((data[3] != 'P') && (data[3] != 'p')) return 1; | |
2405 | while ((pos < len) && (htp_is_space(data[pos]) || data[pos] == 0)) pos++; | |
2406 | ||
2407 | if (len < pos + 4) return 1; | |
2408 | ||
2409 | if ((data[pos] != 'H') && (data[pos] != 'h')) return 1; | |
2410 | if ((data[pos+1] != 'T') && (data[pos+1] != 't')) return 1; | |
2411 | if ((data[pos+2] != 'T') && (data[pos+2] != 't')) return 1; | |
2412 | if ((data[pos+3] != 'P') && (data[pos+3] != 'p')) return 1; | |
2376 | 2413 | |
2377 | 2414 | return 0; |
2378 | 2415 | } |
0 | /* 7zTypes.h -- Basic types | |
1 | 2018-08-04 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #ifndef __7Z_TYPES_H | |
4 | #define __7Z_TYPES_H | |
5 | ||
6 | #ifdef _WIN32 | |
7 | /* #include <windows.h> */ | |
8 | #endif | |
9 | ||
10 | #include <stddef.h> | |
11 | #include <zconf.h> | |
12 | ||
13 | #ifndef EXTERN_C_BEGIN | |
14 | #ifdef __cplusplus | |
15 | #define EXTERN_C_BEGIN extern "C" { | |
16 | #define EXTERN_C_END } | |
17 | #else | |
18 | #define EXTERN_C_BEGIN | |
19 | #define EXTERN_C_END | |
20 | #endif | |
21 | #endif | |
22 | ||
23 | EXTERN_C_BEGIN | |
24 | ||
25 | #define SZ_OK 0 | |
26 | ||
27 | #define SZ_ERROR_DATA 1 | |
28 | #define SZ_ERROR_MEM 2 | |
29 | #define SZ_ERROR_CRC 3 | |
30 | #define SZ_ERROR_UNSUPPORTED 4 | |
31 | #define SZ_ERROR_PARAM 5 | |
32 | #define SZ_ERROR_INPUT_EOF 6 | |
33 | #define SZ_ERROR_OUTPUT_EOF 7 | |
34 | #define SZ_ERROR_READ 8 | |
35 | #define SZ_ERROR_WRITE 9 | |
36 | #define SZ_ERROR_PROGRESS 10 | |
37 | #define SZ_ERROR_FAIL 11 | |
38 | #define SZ_ERROR_THREAD 12 | |
39 | ||
40 | #define SZ_ERROR_ARCHIVE 16 | |
41 | #define SZ_ERROR_NO_ARCHIVE 17 | |
42 | ||
43 | typedef int SRes; | |
44 | ||
45 | ||
46 | #ifdef _WIN32 | |
47 | ||
48 | /* typedef DWORD WRes; */ | |
49 | typedef unsigned WRes; | |
50 | #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) | |
51 | ||
52 | #else | |
53 | ||
54 | typedef int WRes; | |
55 | #define MY__FACILITY_WIN32 7 | |
56 | #define MY__FACILITY__WRes MY__FACILITY_WIN32 | |
57 | #define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) | |
58 | ||
59 | #endif | |
60 | ||
61 | ||
62 | #ifndef RINOK | |
63 | #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } | |
64 | #endif | |
65 | ||
66 | typedef short Int16; | |
67 | typedef unsigned short UInt16; | |
68 | ||
69 | #ifdef _LZMA_UINT32_IS_ULONG | |
70 | typedef long Int32; | |
71 | typedef unsigned long UInt32; | |
72 | #else | |
73 | typedef int Int32; | |
74 | typedef unsigned int UInt32; | |
75 | #endif | |
76 | ||
77 | #ifdef _SZ_NO_INT_64 | |
78 | ||
79 | /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. | |
80 | NOTES: Some code will work incorrectly in that case! */ | |
81 | ||
82 | typedef long Int64; | |
83 | typedef unsigned long UInt64; | |
84 | ||
85 | #else | |
86 | ||
87 | #if defined(_MSC_VER) || defined(__BORLANDC__) | |
88 | typedef __int64 Int64; | |
89 | typedef unsigned __int64 UInt64; | |
90 | #define UINT64_CONST(n) n | |
91 | #else | |
92 | typedef long long int Int64; | |
93 | typedef unsigned long long int UInt64; | |
94 | #define UINT64_CONST(n) n ## ULL | |
95 | #endif | |
96 | ||
97 | #endif | |
98 | ||
99 | #ifdef _LZMA_NO_SYSTEM_SIZE_T | |
100 | typedef UInt32 SizeT; | |
101 | #else | |
102 | typedef size_t SizeT; | |
103 | #endif | |
104 | ||
105 | typedef int BoolInt; | |
106 | /* typedef BoolInt Bool; */ | |
107 | #define True 1 | |
108 | #define False 0 | |
109 | ||
110 | ||
111 | #ifdef _WIN32 | |
112 | #define MY_STD_CALL __stdcall | |
113 | #else | |
114 | #define MY_STD_CALL | |
115 | #endif | |
116 | ||
117 | #ifdef _MSC_VER | |
118 | ||
119 | #if _MSC_VER >= 1300 | |
120 | #define MY_NO_INLINE __declspec(noinline) | |
121 | #else | |
122 | #define MY_NO_INLINE | |
123 | #endif | |
124 | ||
125 | #define MY_FORCE_INLINE __forceinline | |
126 | ||
127 | #define MY_CDECL __cdecl | |
128 | #define MY_FAST_CALL __fastcall | |
129 | ||
130 | #else | |
131 | ||
132 | #define MY_NO_INLINE | |
133 | #define MY_FORCE_INLINE | |
134 | #define MY_CDECL | |
135 | #define MY_FAST_CALL | |
136 | ||
137 | /* inline keyword : for C++ / C99 */ | |
138 | ||
139 | /* GCC, clang: */ | |
140 | /* | |
141 | #if defined (__GNUC__) && (__GNUC__ >= 4) | |
142 | #define MY_FORCE_INLINE __attribute__((always_inline)) | |
143 | #define MY_NO_INLINE __attribute__((noinline)) | |
144 | #endif | |
145 | */ | |
146 | ||
147 | #endif | |
148 | ||
149 | ||
150 | /* The following interfaces use first parameter as pointer to structure */ | |
151 | ||
152 | typedef struct IByteIn IByteIn; | |
153 | struct IByteIn | |
154 | { | |
155 | Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ | |
156 | }; | |
157 | #define IByteIn_Read(p) (p)->Read(p) | |
158 | ||
159 | ||
160 | typedef struct IByteOut IByteOut; | |
161 | struct IByteOut | |
162 | { | |
163 | void (*Write)(const IByteOut *p, Byte b); | |
164 | }; | |
165 | #define IByteOut_Write(p, b) (p)->Write(p, b) | |
166 | ||
167 | ||
168 | typedef struct ISeqInStream ISeqInStream; | |
169 | struct ISeqInStream | |
170 | { | |
171 | SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); | |
172 | /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. | |
173 | (output(*size) < input(*size)) is allowed */ | |
174 | }; | |
175 | #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) | |
176 | ||
177 | /* it can return SZ_ERROR_INPUT_EOF */ | |
178 | SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); | |
179 | SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); | |
180 | SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); | |
181 | ||
182 | ||
183 | typedef struct ISeqOutStream ISeqOutStream; | |
184 | struct ISeqOutStream | |
185 | { | |
186 | size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); | |
187 | /* Returns: result - the number of actually written bytes. | |
188 | (result < size) means error */ | |
189 | }; | |
190 | #define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) | |
191 | ||
192 | typedef enum | |
193 | { | |
194 | SZ_SEEK_SET = 0, | |
195 | SZ_SEEK_CUR = 1, | |
196 | SZ_SEEK_END = 2 | |
197 | } ESzSeek; | |
198 | ||
199 | ||
200 | typedef struct ISeekInStream ISeekInStream; | |
201 | struct ISeekInStream | |
202 | { | |
203 | SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ | |
204 | SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); | |
205 | }; | |
206 | #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) | |
207 | #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) | |
208 | ||
209 | ||
210 | typedef struct ILookInStream ILookInStream; | |
211 | struct ILookInStream | |
212 | { | |
213 | SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); | |
214 | /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. | |
215 | (output(*size) > input(*size)) is not allowed | |
216 | (output(*size) < input(*size)) is allowed */ | |
217 | SRes (*Skip)(const ILookInStream *p, size_t offset); | |
218 | /* offset must be <= output(*size) of Look */ | |
219 | ||
220 | SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); | |
221 | /* reads directly (without buffer). It's same as ISeqInStream::Read */ | |
222 | SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); | |
223 | }; | |
224 | ||
225 | #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) | |
226 | #define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) | |
227 | #define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) | |
228 | #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) | |
229 | ||
230 | ||
231 | SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); | |
232 | SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); | |
233 | ||
234 | /* reads via ILookInStream::Read */ | |
235 | SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); | |
236 | SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); | |
237 | ||
238 | ||
239 | ||
240 | typedef struct | |
241 | { | |
242 | ILookInStream vt; | |
243 | const ISeekInStream *realStream; | |
244 | ||
245 | size_t pos; | |
246 | size_t size; /* it's data size */ | |
247 | ||
248 | /* the following variables must be set outside */ | |
249 | Byte *buf; | |
250 | size_t bufSize; | |
251 | } CLookToRead2; | |
252 | ||
253 | void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); | |
254 | ||
255 | #define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } | |
256 | ||
257 | ||
258 | typedef struct | |
259 | { | |
260 | ISeqInStream vt; | |
261 | const ILookInStream *realStream; | |
262 | } CSecToLook; | |
263 | ||
264 | void SecToLook_CreateVTable(CSecToLook *p); | |
265 | ||
266 | ||
267 | ||
268 | typedef struct | |
269 | { | |
270 | ISeqInStream vt; | |
271 | const ILookInStream *realStream; | |
272 | } CSecToRead; | |
273 | ||
274 | void SecToRead_CreateVTable(CSecToRead *p); | |
275 | ||
276 | ||
277 | typedef struct ICompressProgress ICompressProgress; | |
278 | ||
279 | struct ICompressProgress | |
280 | { | |
281 | SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); | |
282 | /* Returns: result. (result != SZ_OK) means break. | |
283 | Value (UInt64)(Int64)-1 for size means unknown value. */ | |
284 | }; | |
285 | #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) | |
286 | ||
287 | ||
288 | ||
289 | typedef struct ISzAlloc ISzAlloc; | |
290 | typedef const ISzAlloc * ISzAllocPtr; | |
291 | ||
292 | struct ISzAlloc | |
293 | { | |
294 | void *(*Alloc)(ISzAllocPtr p, size_t size); | |
295 | void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ | |
296 | }; | |
297 | ||
298 | #define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) | |
299 | #define ISzAlloc_Free(p, a) (p)->Free(p, a) | |
300 | ||
301 | /* deprecated */ | |
302 | #define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) | |
303 | #define IAlloc_Free(p, a) ISzAlloc_Free(p, a) | |
304 | ||
305 | ||
306 | ||
307 | ||
308 | ||
309 | #ifndef MY_offsetof | |
310 | #ifdef offsetof | |
311 | #define MY_offsetof(type, m) offsetof(type, m) | |
312 | /* | |
313 | #define MY_offsetof(type, m) FIELD_OFFSET(type, m) | |
314 | */ | |
315 | #else | |
316 | #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) | |
317 | #endif | |
318 | #endif | |
319 | ||
320 | ||
321 | ||
322 | #ifndef MY_container_of | |
323 | ||
324 | /* | |
325 | #define MY_container_of(ptr, type, m) container_of(ptr, type, m) | |
326 | #define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) | |
327 | #define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) | |
328 | #define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) | |
329 | */ | |
330 | ||
331 | /* | |
332 | GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" | |
333 | GCC 3.4.4 : classes with constructor | |
334 | GCC 4.8.1 : classes with non-public variable members" | |
335 | */ | |
336 | ||
337 | #define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) | |
338 | ||
339 | ||
340 | #endif | |
341 | ||
342 | #define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) | |
343 | ||
344 | /* | |
345 | #define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) | |
346 | */ | |
347 | #define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) | |
348 | ||
349 | #define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) | |
350 | /* | |
351 | #define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) | |
352 | */ | |
353 | ||
354 | ||
355 | ||
356 | #ifdef _WIN32 | |
357 | ||
358 | #define CHAR_PATH_SEPARATOR '\\' | |
359 | #define WCHAR_PATH_SEPARATOR L'\\' | |
360 | #define STRING_PATH_SEPARATOR "\\" | |
361 | #define WSTRING_PATH_SEPARATOR L"\\" | |
362 | ||
363 | #else | |
364 | ||
365 | #define CHAR_PATH_SEPARATOR '/' | |
366 | #define WCHAR_PATH_SEPARATOR L'/' | |
367 | #define STRING_PATH_SEPARATOR "/" | |
368 | #define WSTRING_PATH_SEPARATOR L"/" | |
369 | ||
370 | #endif | |
371 | ||
372 | EXTERN_C_END | |
373 | ||
374 | #endif |
0 | /* Compiler.h | |
1 | 2017-04-03 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #ifndef __7Z_COMPILER_H | |
4 | #define __7Z_COMPILER_H | |
5 | ||
6 | #ifdef _MSC_VER | |
7 | ||
8 | #ifdef UNDER_CE | |
9 | #define RPC_NO_WINDOWS_H | |
10 | /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ | |
11 | #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union | |
12 | #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int | |
13 | #endif | |
14 | ||
15 | #if _MSC_VER >= 1300 | |
16 | #pragma warning(disable : 4996) // This function or variable may be unsafe | |
17 | #else | |
18 | #pragma warning(disable : 4511) // copy constructor could not be generated | |
19 | #pragma warning(disable : 4512) // assignment operator could not be generated | |
20 | #pragma warning(disable : 4514) // unreferenced inline function has been removed | |
21 | #pragma warning(disable : 4702) // unreachable code | |
22 | #pragma warning(disable : 4710) // not inlined | |
23 | #pragma warning(disable : 4714) // function marked as __forceinline not inlined | |
24 | #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information | |
25 | #endif | |
26 | ||
27 | #endif | |
28 | ||
29 | #define UNUSED_VAR(x) (void)x; | |
30 | /* #define UNUSED_VAR(x) x=x; */ | |
31 | ||
32 | #endif |
0 | /* LzFind.c -- Match finder for LZ algorithms | |
1 | 2018-07-08 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #include "Precomp.h" | |
4 | ||
5 | #include <string.h> | |
6 | ||
7 | #include "LzFind.h" | |
8 | #include "LzHash.h" | |
9 | ||
10 | #define kEmptyHashValue 0 | |
11 | #define kMaxValForNormalize ((UInt32)0xFFFFFFFF) | |
12 | #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ | |
13 | #define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) | |
14 | #define kMaxHistorySize ((UInt32)7 << 29) | |
15 | ||
16 | #define kStartMaxLen 3 | |
17 | ||
18 | static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) | |
19 | { | |
20 | if (!p->directInput) | |
21 | { | |
22 | ISzAlloc_Free(alloc, p->bufferBase); | |
23 | p->bufferBase = NULL; | |
24 | } | |
25 | } | |
26 | ||
27 | /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ | |
28 | ||
29 | static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) | |
30 | { | |
31 | UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; | |
32 | if (p->directInput) | |
33 | { | |
34 | p->blockSize = blockSize; | |
35 | return 1; | |
36 | } | |
37 | if (!p->bufferBase || p->blockSize != blockSize) | |
38 | { | |
39 | LzInWindow_Free(p, alloc); | |
40 | p->blockSize = blockSize; | |
41 | p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); | |
42 | } | |
43 | return (p->bufferBase != NULL); | |
44 | } | |
45 | ||
46 | Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } | |
47 | ||
48 | static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } | |
49 | ||
50 | void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) | |
51 | { | |
52 | p->posLimit -= subValue; | |
53 | p->pos -= subValue; | |
54 | p->streamPos -= subValue; | |
55 | } | |
56 | ||
57 | static void MatchFinder_ReadBlock(CMatchFinder *p) | |
58 | { | |
59 | if (p->streamEndWasReached || p->result != SZ_OK) | |
60 | return; | |
61 | ||
62 | /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ | |
63 | ||
64 | if (p->directInput) | |
65 | { | |
66 | UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); | |
67 | if (curSize > p->directInputRem) | |
68 | curSize = (UInt32)p->directInputRem; | |
69 | p->directInputRem -= curSize; | |
70 | p->streamPos += curSize; | |
71 | if (p->directInputRem == 0) | |
72 | p->streamEndWasReached = 1; | |
73 | return; | |
74 | } | |
75 | ||
76 | for (;;) | |
77 | { | |
78 | Byte *dest = p->buffer + (p->streamPos - p->pos); | |
79 | size_t size = (p->bufferBase + p->blockSize - dest); | |
80 | if (size == 0) | |
81 | return; | |
82 | ||
83 | p->result = ISeqInStream_Read(p->stream, dest, &size); | |
84 | if (p->result != SZ_OK) | |
85 | return; | |
86 | if (size == 0) | |
87 | { | |
88 | p->streamEndWasReached = 1; | |
89 | return; | |
90 | } | |
91 | p->streamPos += (UInt32)size; | |
92 | if (p->streamPos - p->pos > p->keepSizeAfter) | |
93 | return; | |
94 | } | |
95 | } | |
96 | ||
97 | void MatchFinder_MoveBlock(CMatchFinder *p) | |
98 | { | |
99 | memmove(p->bufferBase, | |
100 | p->buffer - p->keepSizeBefore, | |
101 | (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); | |
102 | p->buffer = p->bufferBase + p->keepSizeBefore; | |
103 | } | |
104 | ||
105 | int MatchFinder_NeedMove(CMatchFinder *p) | |
106 | { | |
107 | if (p->directInput) | |
108 | return 0; | |
109 | /* if (p->streamEndWasReached) return 0; */ | |
110 | return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); | |
111 | } | |
112 | ||
113 | void MatchFinder_ReadIfRequired(CMatchFinder *p) | |
114 | { | |
115 | if (p->streamEndWasReached) | |
116 | return; | |
117 | if (p->keepSizeAfter >= p->streamPos - p->pos) | |
118 | MatchFinder_ReadBlock(p); | |
119 | } | |
120 | ||
121 | static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) | |
122 | { | |
123 | if (MatchFinder_NeedMove(p)) | |
124 | MatchFinder_MoveBlock(p); | |
125 | MatchFinder_ReadBlock(p); | |
126 | } | |
127 | ||
128 | static void MatchFinder_SetDefaultSettings(CMatchFinder *p) | |
129 | { | |
130 | p->cutValue = 32; | |
131 | p->btMode = 1; | |
132 | p->numHashBytes = 4; | |
133 | p->bigHash = 0; | |
134 | } | |
135 | ||
136 | #define kCrcPoly 0xEDB88320 | |
137 | ||
138 | void MatchFinder_Construct(CMatchFinder *p) | |
139 | { | |
140 | unsigned i; | |
141 | p->bufferBase = NULL; | |
142 | p->directInput = 0; | |
143 | p->hash = NULL; | |
144 | p->expectedDataSize = (UInt64)(Int64)-1; | |
145 | MatchFinder_SetDefaultSettings(p); | |
146 | ||
147 | for (i = 0; i < 256; i++) | |
148 | { | |
149 | UInt32 r = (UInt32)i; | |
150 | unsigned j; | |
151 | for (j = 0; j < 8; j++) | |
152 | r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); | |
153 | p->crc[i] = r; | |
154 | } | |
155 | } | |
156 | ||
157 | static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) | |
158 | { | |
159 | ISzAlloc_Free(alloc, p->hash); | |
160 | p->hash = NULL; | |
161 | } | |
162 | ||
163 | void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) | |
164 | { | |
165 | MatchFinder_FreeThisClassMemory(p, alloc); | |
166 | LzInWindow_Free(p, alloc); | |
167 | } | |
168 | ||
169 | static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) | |
170 | { | |
171 | size_t sizeInBytes = (size_t)num * sizeof(CLzRef); | |
172 | if (sizeInBytes / sizeof(CLzRef) != num) | |
173 | return NULL; | |
174 | return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); | |
175 | } | |
176 | ||
177 | int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, | |
178 | UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, | |
179 | ISzAllocPtr alloc) | |
180 | { | |
181 | UInt32 sizeReserv; | |
182 | ||
183 | if (historySize > kMaxHistorySize) | |
184 | { | |
185 | MatchFinder_Free(p, alloc); | |
186 | return 0; | |
187 | } | |
188 | ||
189 | sizeReserv = historySize >> 1; | |
190 | if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; | |
191 | else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; | |
192 | ||
193 | sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); | |
194 | ||
195 | p->keepSizeBefore = historySize + keepAddBufferBefore + 1; | |
196 | p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; | |
197 | ||
198 | /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ | |
199 | ||
200 | if (LzInWindow_Create(p, sizeReserv, alloc)) | |
201 | { | |
202 | UInt32 newCyclicBufferSize = historySize + 1; | |
203 | UInt32 hs; | |
204 | p->matchMaxLen = matchMaxLen; | |
205 | { | |
206 | p->fixedHashSize = 0; | |
207 | if (p->numHashBytes == 2) | |
208 | hs = (1 << 16) - 1; | |
209 | else | |
210 | { | |
211 | hs = historySize; | |
212 | if (hs > p->expectedDataSize) | |
213 | hs = (UInt32)p->expectedDataSize; | |
214 | if (hs != 0) | |
215 | hs--; | |
216 | hs |= (hs >> 1); | |
217 | hs |= (hs >> 2); | |
218 | hs |= (hs >> 4); | |
219 | hs |= (hs >> 8); | |
220 | hs >>= 1; | |
221 | hs |= 0xFFFF; /* don't change it! It's required for Deflate */ | |
222 | if (hs > (1 << 24)) | |
223 | { | |
224 | if (p->numHashBytes == 3) | |
225 | hs = (1 << 24) - 1; | |
226 | else | |
227 | hs >>= 1; | |
228 | /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ | |
229 | } | |
230 | } | |
231 | p->hashMask = hs; | |
232 | hs++; | |
233 | if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; | |
234 | if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; | |
235 | if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; | |
236 | hs += p->fixedHashSize; | |
237 | } | |
238 | ||
239 | { | |
240 | size_t newSize; | |
241 | size_t numSons; | |
242 | p->historySize = historySize; | |
243 | p->hashSizeSum = hs; | |
244 | p->cyclicBufferSize = newCyclicBufferSize; | |
245 | ||
246 | numSons = newCyclicBufferSize; | |
247 | if (p->btMode) | |
248 | numSons <<= 1; | |
249 | newSize = hs + numSons; | |
250 | ||
251 | if (p->hash && p->numRefs == newSize) | |
252 | return 1; | |
253 | ||
254 | MatchFinder_FreeThisClassMemory(p, alloc); | |
255 | p->numRefs = newSize; | |
256 | p->hash = AllocRefs(newSize, alloc); | |
257 | ||
258 | if (p->hash) | |
259 | { | |
260 | p->son = p->hash + p->hashSizeSum; | |
261 | return 1; | |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | MatchFinder_Free(p, alloc); | |
267 | return 0; | |
268 | } | |
269 | ||
270 | static void MatchFinder_SetLimits(CMatchFinder *p) | |
271 | { | |
272 | UInt32 limit = kMaxValForNormalize - p->pos; | |
273 | UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; | |
274 | ||
275 | if (limit2 < limit) | |
276 | limit = limit2; | |
277 | limit2 = p->streamPos - p->pos; | |
278 | ||
279 | if (limit2 <= p->keepSizeAfter) | |
280 | { | |
281 | if (limit2 > 0) | |
282 | limit2 = 1; | |
283 | } | |
284 | else | |
285 | limit2 -= p->keepSizeAfter; | |
286 | ||
287 | if (limit2 < limit) | |
288 | limit = limit2; | |
289 | ||
290 | { | |
291 | UInt32 lenLimit = p->streamPos - p->pos; | |
292 | if (lenLimit > p->matchMaxLen) | |
293 | lenLimit = p->matchMaxLen; | |
294 | p->lenLimit = lenLimit; | |
295 | } | |
296 | p->posLimit = p->pos + limit; | |
297 | } | |
298 | ||
299 | ||
300 | void MatchFinder_Init_LowHash(CMatchFinder *p) | |
301 | { | |
302 | size_t i; | |
303 | CLzRef *items = p->hash; | |
304 | size_t numItems = p->fixedHashSize; | |
305 | for (i = 0; i < numItems; i++) | |
306 | items[i] = kEmptyHashValue; | |
307 | } | |
308 | ||
309 | ||
310 | void MatchFinder_Init_HighHash(CMatchFinder *p) | |
311 | { | |
312 | size_t i; | |
313 | CLzRef *items = p->hash + p->fixedHashSize; | |
314 | size_t numItems = (size_t)p->hashMask + 1; | |
315 | for (i = 0; i < numItems; i++) | |
316 | items[i] = kEmptyHashValue; | |
317 | } | |
318 | ||
319 | ||
320 | void MatchFinder_Init_3(CMatchFinder *p, int readData) | |
321 | { | |
322 | p->cyclicBufferPos = 0; | |
323 | p->buffer = p->bufferBase; | |
324 | p->pos = | |
325 | p->streamPos = p->cyclicBufferSize; | |
326 | p->result = SZ_OK; | |
327 | p->streamEndWasReached = 0; | |
328 | ||
329 | if (readData) | |
330 | MatchFinder_ReadBlock(p); | |
331 | ||
332 | MatchFinder_SetLimits(p); | |
333 | } | |
334 | ||
335 | ||
336 | void MatchFinder_Init(CMatchFinder *p) | |
337 | { | |
338 | MatchFinder_Init_HighHash(p); | |
339 | MatchFinder_Init_LowHash(p); | |
340 | MatchFinder_Init_3(p, True); | |
341 | } | |
342 | ||
343 | ||
344 | static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) | |
345 | { | |
346 | return (p->pos - p->historySize - 1) & kNormalizeMask; | |
347 | } | |
348 | ||
349 | void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) | |
350 | { | |
351 | size_t i; | |
352 | for (i = 0; i < numItems; i++) | |
353 | { | |
354 | UInt32 value = items[i]; | |
355 | if (value <= subValue) | |
356 | value = kEmptyHashValue; | |
357 | else | |
358 | value -= subValue; | |
359 | items[i] = value; | |
360 | } | |
361 | } | |
362 | ||
363 | static void MatchFinder_Normalize(CMatchFinder *p) | |
364 | { | |
365 | UInt32 subValue = MatchFinder_GetSubValue(p); | |
366 | MatchFinder_Normalize3(subValue, p->hash, p->numRefs); | |
367 | MatchFinder_ReduceOffsets(p, subValue); | |
368 | } | |
369 | ||
370 | ||
371 | MY_NO_INLINE | |
372 | static void MatchFinder_CheckLimits(CMatchFinder *p) | |
373 | { | |
374 | if (p->pos == kMaxValForNormalize) | |
375 | MatchFinder_Normalize(p); | |
376 | if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) | |
377 | MatchFinder_CheckAndMoveAndRead(p); | |
378 | if (p->cyclicBufferPos == p->cyclicBufferSize) | |
379 | p->cyclicBufferPos = 0; | |
380 | MatchFinder_SetLimits(p); | |
381 | } | |
382 | ||
383 | ||
384 | /* | |
385 | (lenLimit > maxLen) | |
386 | */ | |
387 | MY_FORCE_INLINE | |
388 | static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, | |
389 | UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, | |
390 | UInt32 *distances, unsigned maxLen) | |
391 | { | |
392 | /* | |
393 | son[_cyclicBufferPos] = curMatch; | |
394 | for (;;) | |
395 | { | |
396 | UInt32 delta = pos - curMatch; | |
397 | if (cutValue-- == 0 || delta >= _cyclicBufferSize) | |
398 | return distances; | |
399 | { | |
400 | const Byte *pb = cur - delta; | |
401 | curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; | |
402 | if (pb[maxLen] == cur[maxLen] && *pb == *cur) | |
403 | { | |
404 | UInt32 len = 0; | |
405 | while (++len != lenLimit) | |
406 | if (pb[len] != cur[len]) | |
407 | break; | |
408 | if (maxLen < len) | |
409 | { | |
410 | maxLen = len; | |
411 | *distances++ = len; | |
412 | *distances++ = delta - 1; | |
413 | if (len == lenLimit) | |
414 | return distances; | |
415 | } | |
416 | } | |
417 | } | |
418 | } | |
419 | */ | |
420 | ||
421 | const Byte *lim = cur + lenLimit; | |
422 | son[_cyclicBufferPos] = curMatch; | |
423 | do | |
424 | { | |
425 | UInt32 delta = pos - curMatch; | |
426 | if (delta >= _cyclicBufferSize) | |
427 | break; | |
428 | { | |
429 | ptrdiff_t diff; | |
430 | curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; | |
431 | diff = (ptrdiff_t)0 - delta; | |
432 | if (cur[maxLen] == cur[maxLen + diff]) | |
433 | { | |
434 | const Byte *c = cur; | |
435 | while (*c == c[diff]) | |
436 | { | |
437 | if (++c == lim) | |
438 | { | |
439 | distances[0] = (UInt32)(lim - cur); | |
440 | distances[1] = delta - 1; | |
441 | return distances + 2; | |
442 | } | |
443 | } | |
444 | { | |
445 | unsigned len = (unsigned)(c - cur); | |
446 | if (maxLen < len) | |
447 | { | |
448 | maxLen = len; | |
449 | distances[0] = (UInt32)len; | |
450 | distances[1] = delta - 1; | |
451 | distances += 2; | |
452 | } | |
453 | } | |
454 | } | |
455 | } | |
456 | } | |
457 | while (--cutValue); | |
458 | ||
459 | return distances; | |
460 | } | |
461 | ||
462 | ||
463 | MY_FORCE_INLINE | |
464 | UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, | |
465 | UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, | |
466 | UInt32 *distances, UInt32 maxLen) | |
467 | { | |
468 | CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; | |
469 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | |
470 | unsigned len0 = 0, len1 = 0; | |
471 | for (;;) | |
472 | { | |
473 | UInt32 delta = pos - curMatch; | |
474 | if (cutValue-- == 0 || delta >= _cyclicBufferSize) | |
475 | { | |
476 | *ptr0 = *ptr1 = kEmptyHashValue; | |
477 | return distances; | |
478 | } | |
479 | { | |
480 | CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); | |
481 | const Byte *pb = cur - delta; | |
482 | unsigned len = (len0 < len1 ? len0 : len1); | |
483 | UInt32 pair0 = pair[0]; | |
484 | if (pb[len] == cur[len]) | |
485 | { | |
486 | if (++len != lenLimit && pb[len] == cur[len]) | |
487 | while (++len != lenLimit) | |
488 | if (pb[len] != cur[len]) | |
489 | break; | |
490 | if (maxLen < len) | |
491 | { | |
492 | maxLen = (UInt32)len; | |
493 | *distances++ = (UInt32)len; | |
494 | *distances++ = delta - 1; | |
495 | if (len == lenLimit) | |
496 | { | |
497 | *ptr1 = pair0; | |
498 | *ptr0 = pair[1]; | |
499 | return distances; | |
500 | } | |
501 | } | |
502 | } | |
503 | if (pb[len] < cur[len]) | |
504 | { | |
505 | *ptr1 = curMatch; | |
506 | ptr1 = pair + 1; | |
507 | curMatch = *ptr1; | |
508 | len1 = len; | |
509 | } | |
510 | else | |
511 | { | |
512 | *ptr0 = curMatch; | |
513 | ptr0 = pair; | |
514 | curMatch = *ptr0; | |
515 | len0 = len; | |
516 | } | |
517 | } | |
518 | } | |
519 | } | |
520 | ||
521 | static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, | |
522 | UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) | |
523 | { | |
524 | CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; | |
525 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | |
526 | unsigned len0 = 0, len1 = 0; | |
527 | for (;;) | |
528 | { | |
529 | UInt32 delta = pos - curMatch; | |
530 | if (cutValue-- == 0 || delta >= _cyclicBufferSize) | |
531 | { | |
532 | *ptr0 = *ptr1 = kEmptyHashValue; | |
533 | return; | |
534 | } | |
535 | { | |
536 | CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); | |
537 | const Byte *pb = cur - delta; | |
538 | unsigned len = (len0 < len1 ? len0 : len1); | |
539 | if (pb[len] == cur[len]) | |
540 | { | |
541 | while (++len != lenLimit) | |
542 | if (pb[len] != cur[len]) | |
543 | break; | |
544 | { | |
545 | if (len == lenLimit) | |
546 | { | |
547 | *ptr1 = pair[0]; | |
548 | *ptr0 = pair[1]; | |
549 | return; | |
550 | } | |
551 | } | |
552 | } | |
553 | if (pb[len] < cur[len]) | |
554 | { | |
555 | *ptr1 = curMatch; | |
556 | ptr1 = pair + 1; | |
557 | curMatch = *ptr1; | |
558 | len1 = len; | |
559 | } | |
560 | else | |
561 | { | |
562 | *ptr0 = curMatch; | |
563 | ptr0 = pair; | |
564 | curMatch = *ptr0; | |
565 | len0 = len; | |
566 | } | |
567 | } | |
568 | } | |
569 | } | |
570 | ||
571 | #define MOVE_POS \ | |
572 | ++p->cyclicBufferPos; \ | |
573 | p->buffer++; \ | |
574 | if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); | |
575 | ||
576 | #define MOVE_POS_RET MOVE_POS return (UInt32)offset; | |
577 | ||
578 | static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } | |
579 | ||
580 | #define GET_MATCHES_HEADER2(minLen, ret_op) \ | |
581 | unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ | |
582 | lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ | |
583 | cur = p->buffer; | |
584 | ||
585 | #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) | |
586 | #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) | |
587 | ||
588 | #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue | |
589 | ||
590 | #define GET_MATCHES_FOOTER(offset, maxLen) \ | |
591 | offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ | |
592 | distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; | |
593 | ||
594 | #define SKIP_FOOTER \ | |
595 | SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; | |
596 | ||
597 | #define UPDATE_maxLen { \ | |
598 | ptrdiff_t diff = (ptrdiff_t)0 - d2; \ | |
599 | const Byte *c = cur + maxLen; \ | |
600 | const Byte *lim = cur + lenLimit; \ | |
601 | for (; c != lim; c++) if (*(c + diff) != *c) break; \ | |
602 | maxLen = (unsigned)(c - cur); } | |
603 | ||
604 | static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
605 | { | |
606 | unsigned offset; | |
607 | GET_MATCHES_HEADER(2) | |
608 | HASH2_CALC; | |
609 | curMatch = p->hash[hv]; | |
610 | p->hash[hv] = p->pos; | |
611 | offset = 0; | |
612 | GET_MATCHES_FOOTER(offset, 1) | |
613 | } | |
614 | ||
615 | UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
616 | { | |
617 | unsigned offset; | |
618 | GET_MATCHES_HEADER(3) | |
619 | HASH_ZIP_CALC; | |
620 | curMatch = p->hash[hv]; | |
621 | p->hash[hv] = p->pos; | |
622 | offset = 0; | |
623 | GET_MATCHES_FOOTER(offset, 2) | |
624 | } | |
625 | ||
626 | static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
627 | { | |
628 | UInt32 h2, d2, pos; | |
629 | unsigned maxLen, offset; | |
630 | UInt32 *hash; | |
631 | GET_MATCHES_HEADER(3) | |
632 | ||
633 | HASH3_CALC; | |
634 | ||
635 | hash = p->hash; | |
636 | pos = p->pos; | |
637 | ||
638 | d2 = pos - hash[h2]; | |
639 | ||
640 | curMatch = (hash + kFix3HashSize)[hv]; | |
641 | ||
642 | hash[h2] = pos; | |
643 | (hash + kFix3HashSize)[hv] = pos; | |
644 | ||
645 | maxLen = 2; | |
646 | offset = 0; | |
647 | ||
648 | if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) | |
649 | { | |
650 | UPDATE_maxLen | |
651 | distances[0] = (UInt32)maxLen; | |
652 | distances[1] = d2 - 1; | |
653 | offset = 2; | |
654 | if (maxLen == lenLimit) | |
655 | { | |
656 | SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); | |
657 | MOVE_POS_RET; | |
658 | } | |
659 | } | |
660 | ||
661 | GET_MATCHES_FOOTER(offset, maxLen) | |
662 | } | |
663 | ||
664 | static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
665 | { | |
666 | UInt32 h2, h3, d2, d3, pos; | |
667 | unsigned maxLen, offset; | |
668 | UInt32 *hash; | |
669 | GET_MATCHES_HEADER(4) | |
670 | ||
671 | HASH4_CALC; | |
672 | ||
673 | hash = p->hash; | |
674 | pos = p->pos; | |
675 | ||
676 | d2 = pos - hash [h2]; | |
677 | d3 = pos - (hash + kFix3HashSize)[h3]; | |
678 | ||
679 | curMatch = (hash + kFix4HashSize)[hv]; | |
680 | ||
681 | hash [h2] = pos; | |
682 | (hash + kFix3HashSize)[h3] = pos; | |
683 | (hash + kFix4HashSize)[hv] = pos; | |
684 | ||
685 | maxLen = 0; | |
686 | offset = 0; | |
687 | ||
688 | if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) | |
689 | { | |
690 | maxLen = 2; | |
691 | distances[0] = 2; | |
692 | distances[1] = d2 - 1; | |
693 | offset = 2; | |
694 | } | |
695 | ||
696 | if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) | |
697 | { | |
698 | maxLen = 3; | |
699 | distances[(size_t)offset + 1] = d3 - 1; | |
700 | offset += 2; | |
701 | d2 = d3; | |
702 | } | |
703 | ||
704 | if (offset != 0) | |
705 | { | |
706 | UPDATE_maxLen | |
707 | distances[(size_t)offset - 2] = (UInt32)maxLen; | |
708 | if (maxLen == lenLimit) | |
709 | { | |
710 | SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); | |
711 | MOVE_POS_RET; | |
712 | } | |
713 | } | |
714 | ||
715 | if (maxLen < 3) | |
716 | maxLen = 3; | |
717 | ||
718 | GET_MATCHES_FOOTER(offset, maxLen) | |
719 | } | |
720 | ||
721 | /* | |
722 | static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
723 | { | |
724 | UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; | |
725 | UInt32 *hash; | |
726 | GET_MATCHES_HEADER(5) | |
727 | ||
728 | HASH5_CALC; | |
729 | ||
730 | hash = p->hash; | |
731 | pos = p->pos; | |
732 | ||
733 | d2 = pos - hash [h2]; | |
734 | d3 = pos - (hash + kFix3HashSize)[h3]; | |
735 | d4 = pos - (hash + kFix4HashSize)[h4]; | |
736 | ||
737 | curMatch = (hash + kFix5HashSize)[hv]; | |
738 | ||
739 | hash [h2] = pos; | |
740 | (hash + kFix3HashSize)[h3] = pos; | |
741 | (hash + kFix4HashSize)[h4] = pos; | |
742 | (hash + kFix5HashSize)[hv] = pos; | |
743 | ||
744 | maxLen = 0; | |
745 | offset = 0; | |
746 | ||
747 | if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) | |
748 | { | |
749 | distances[0] = maxLen = 2; | |
750 | distances[1] = d2 - 1; | |
751 | offset = 2; | |
752 | if (*(cur - d2 + 2) == cur[2]) | |
753 | distances[0] = maxLen = 3; | |
754 | else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) | |
755 | { | |
756 | distances[2] = maxLen = 3; | |
757 | distances[3] = d3 - 1; | |
758 | offset = 4; | |
759 | d2 = d3; | |
760 | } | |
761 | } | |
762 | else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) | |
763 | { | |
764 | distances[0] = maxLen = 3; | |
765 | distances[1] = d3 - 1; | |
766 | offset = 2; | |
767 | d2 = d3; | |
768 | } | |
769 | ||
770 | if (d2 != d4 && d4 < p->cyclicBufferSize | |
771 | && *(cur - d4) == *cur | |
772 | && *(cur - d4 + 3) == *(cur + 3)) | |
773 | { | |
774 | maxLen = 4; | |
775 | distances[(size_t)offset + 1] = d4 - 1; | |
776 | offset += 2; | |
777 | d2 = d4; | |
778 | } | |
779 | ||
780 | if (offset != 0) | |
781 | { | |
782 | UPDATE_maxLen | |
783 | distances[(size_t)offset - 2] = maxLen; | |
784 | if (maxLen == lenLimit) | |
785 | { | |
786 | SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); | |
787 | MOVE_POS_RET; | |
788 | } | |
789 | } | |
790 | ||
791 | if (maxLen < 4) | |
792 | maxLen = 4; | |
793 | ||
794 | GET_MATCHES_FOOTER(offset, maxLen) | |
795 | } | |
796 | */ | |
797 | ||
798 | static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
799 | { | |
800 | UInt32 h2, h3, d2, d3, pos; | |
801 | unsigned maxLen, offset; | |
802 | UInt32 *hash; | |
803 | GET_MATCHES_HEADER(4) | |
804 | ||
805 | HASH4_CALC; | |
806 | ||
807 | hash = p->hash; | |
808 | pos = p->pos; | |
809 | ||
810 | d2 = pos - hash [h2]; | |
811 | d3 = pos - (hash + kFix3HashSize)[h3]; | |
812 | curMatch = (hash + kFix4HashSize)[hv]; | |
813 | ||
814 | hash [h2] = pos; | |
815 | (hash + kFix3HashSize)[h3] = pos; | |
816 | (hash + kFix4HashSize)[hv] = pos; | |
817 | ||
818 | maxLen = 0; | |
819 | offset = 0; | |
820 | ||
821 | if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) | |
822 | { | |
823 | maxLen = 2; | |
824 | distances[0] = 2; | |
825 | distances[1] = d2 - 1; | |
826 | offset = 2; | |
827 | } | |
828 | ||
829 | if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) | |
830 | { | |
831 | maxLen = 3; | |
832 | distances[(size_t)offset + 1] = d3 - 1; | |
833 | offset += 2; | |
834 | d2 = d3; | |
835 | } | |
836 | ||
837 | if (offset != 0) | |
838 | { | |
839 | UPDATE_maxLen | |
840 | distances[(size_t)offset - 2] = (UInt32)maxLen; | |
841 | if (maxLen == lenLimit) | |
842 | { | |
843 | p->son[p->cyclicBufferPos] = curMatch; | |
844 | MOVE_POS_RET; | |
845 | } | |
846 | } | |
847 | ||
848 | if (maxLen < 3) | |
849 | maxLen = 3; | |
850 | ||
851 | offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), | |
852 | distances + offset, maxLen) - (distances)); | |
853 | MOVE_POS_RET | |
854 | } | |
855 | ||
856 | /* | |
857 | static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
858 | { | |
859 | UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos | |
860 | UInt32 *hash; | |
861 | GET_MATCHES_HEADER(5) | |
862 | ||
863 | HASH5_CALC; | |
864 | ||
865 | hash = p->hash; | |
866 | pos = p->pos; | |
867 | ||
868 | d2 = pos - hash [h2]; | |
869 | d3 = pos - (hash + kFix3HashSize)[h3]; | |
870 | d4 = pos - (hash + kFix4HashSize)[h4]; | |
871 | ||
872 | curMatch = (hash + kFix5HashSize)[hv]; | |
873 | ||
874 | hash [h2] = pos; | |
875 | (hash + kFix3HashSize)[h3] = pos; | |
876 | (hash + kFix4HashSize)[h4] = pos; | |
877 | (hash + kFix5HashSize)[hv] = pos; | |
878 | ||
879 | maxLen = 0; | |
880 | offset = 0; | |
881 | ||
882 | if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) | |
883 | { | |
884 | distances[0] = maxLen = 2; | |
885 | distances[1] = d2 - 1; | |
886 | offset = 2; | |
887 | if (*(cur - d2 + 2) == cur[2]) | |
888 | distances[0] = maxLen = 3; | |
889 | else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) | |
890 | { | |
891 | distances[2] = maxLen = 3; | |
892 | distances[3] = d3 - 1; | |
893 | offset = 4; | |
894 | d2 = d3; | |
895 | } | |
896 | } | |
897 | else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) | |
898 | { | |
899 | distances[0] = maxLen = 3; | |
900 | distances[1] = d3 - 1; | |
901 | offset = 2; | |
902 | d2 = d3; | |
903 | } | |
904 | ||
905 | if (d2 != d4 && d4 < p->cyclicBufferSize | |
906 | && *(cur - d4) == *cur | |
907 | && *(cur - d4 + 3) == *(cur + 3)) | |
908 | { | |
909 | maxLen = 4; | |
910 | distances[(size_t)offset + 1] = d4 - 1; | |
911 | offset += 2; | |
912 | d2 = d4; | |
913 | } | |
914 | ||
915 | if (offset != 0) | |
916 | { | |
917 | UPDATE_maxLen | |
918 | distances[(size_t)offset - 2] = maxLen; | |
919 | if (maxLen == lenLimit) | |
920 | { | |
921 | p->son[p->cyclicBufferPos] = curMatch; | |
922 | MOVE_POS_RET; | |
923 | } | |
924 | } | |
925 | ||
926 | if (maxLen < 4) | |
927 | maxLen = 4; | |
928 | ||
929 | offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), | |
930 | distances + offset, maxLen) - (distances)); | |
931 | MOVE_POS_RET | |
932 | } | |
933 | */ | |
934 | ||
935 | UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | |
936 | { | |
937 | unsigned offset; | |
938 | GET_MATCHES_HEADER(3) | |
939 | HASH_ZIP_CALC; | |
940 | curMatch = p->hash[hv]; | |
941 | p->hash[hv] = p->pos; | |
942 | offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), | |
943 | distances, 2) - (distances)); | |
944 | MOVE_POS_RET | |
945 | } | |
946 | ||
947 | static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
948 | { | |
949 | do | |
950 | { | |
951 | SKIP_HEADER(2) | |
952 | HASH2_CALC; | |
953 | curMatch = p->hash[hv]; | |
954 | p->hash[hv] = p->pos; | |
955 | SKIP_FOOTER | |
956 | } | |
957 | while (--num != 0); | |
958 | } | |
959 | ||
960 | void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
961 | { | |
962 | do | |
963 | { | |
964 | SKIP_HEADER(3) | |
965 | HASH_ZIP_CALC; | |
966 | curMatch = p->hash[hv]; | |
967 | p->hash[hv] = p->pos; | |
968 | SKIP_FOOTER | |
969 | } | |
970 | while (--num != 0); | |
971 | } | |
972 | ||
973 | static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
974 | { | |
975 | do | |
976 | { | |
977 | UInt32 h2; | |
978 | UInt32 *hash; | |
979 | SKIP_HEADER(3) | |
980 | HASH3_CALC; | |
981 | hash = p->hash; | |
982 | curMatch = (hash + kFix3HashSize)[hv]; | |
983 | hash[h2] = | |
984 | (hash + kFix3HashSize)[hv] = p->pos; | |
985 | SKIP_FOOTER | |
986 | } | |
987 | while (--num != 0); | |
988 | } | |
989 | ||
990 | static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
991 | { | |
992 | do | |
993 | { | |
994 | UInt32 h2, h3; | |
995 | UInt32 *hash; | |
996 | SKIP_HEADER(4) | |
997 | HASH4_CALC; | |
998 | hash = p->hash; | |
999 | curMatch = (hash + kFix4HashSize)[hv]; | |
1000 | hash [h2] = | |
1001 | (hash + kFix3HashSize)[h3] = | |
1002 | (hash + kFix4HashSize)[hv] = p->pos; | |
1003 | SKIP_FOOTER | |
1004 | } | |
1005 | while (--num != 0); | |
1006 | } | |
1007 | ||
1008 | /* | |
1009 | static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
1010 | { | |
1011 | do | |
1012 | { | |
1013 | UInt32 h2, h3, h4; | |
1014 | UInt32 *hash; | |
1015 | SKIP_HEADER(5) | |
1016 | HASH5_CALC; | |
1017 | hash = p->hash; | |
1018 | curMatch = (hash + kFix5HashSize)[hv]; | |
1019 | hash [h2] = | |
1020 | (hash + kFix3HashSize)[h3] = | |
1021 | (hash + kFix4HashSize)[h4] = | |
1022 | (hash + kFix5HashSize)[hv] = p->pos; | |
1023 | SKIP_FOOTER | |
1024 | } | |
1025 | while (--num != 0); | |
1026 | } | |
1027 | */ | |
1028 | ||
1029 | static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
1030 | { | |
1031 | do | |
1032 | { | |
1033 | UInt32 h2, h3; | |
1034 | UInt32 *hash; | |
1035 | SKIP_HEADER(4) | |
1036 | HASH4_CALC; | |
1037 | hash = p->hash; | |
1038 | curMatch = (hash + kFix4HashSize)[hv]; | |
1039 | hash [h2] = | |
1040 | (hash + kFix3HashSize)[h3] = | |
1041 | (hash + kFix4HashSize)[hv] = p->pos; | |
1042 | p->son[p->cyclicBufferPos] = curMatch; | |
1043 | MOVE_POS | |
1044 | } | |
1045 | while (--num != 0); | |
1046 | } | |
1047 | ||
1048 | /* | |
1049 | static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
1050 | { | |
1051 | do | |
1052 | { | |
1053 | UInt32 h2, h3, h4; | |
1054 | UInt32 *hash; | |
1055 | SKIP_HEADER(5) | |
1056 | HASH5_CALC; | |
1057 | hash = p->hash; | |
1058 | curMatch = hash + kFix5HashSize)[hv]; | |
1059 | hash [h2] = | |
1060 | (hash + kFix3HashSize)[h3] = | |
1061 | (hash + kFix4HashSize)[h4] = | |
1062 | (hash + kFix5HashSize)[hv] = p->pos; | |
1063 | p->son[p->cyclicBufferPos] = curMatch; | |
1064 | MOVE_POS | |
1065 | } | |
1066 | while (--num != 0); | |
1067 | } | |
1068 | */ | |
1069 | ||
1070 | void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | |
1071 | { | |
1072 | do | |
1073 | { | |
1074 | SKIP_HEADER(3) | |
1075 | HASH_ZIP_CALC; | |
1076 | curMatch = p->hash[hv]; | |
1077 | p->hash[hv] = p->pos; | |
1078 | p->son[p->cyclicBufferPos] = curMatch; | |
1079 | MOVE_POS | |
1080 | } | |
1081 | while (--num != 0); | |
1082 | } | |
1083 | ||
1084 | void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) | |
1085 | { | |
1086 | vTable->Init = (Mf_Init_Func)MatchFinder_Init; | |
1087 | vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; | |
1088 | vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; | |
1089 | if (!p->btMode) | |
1090 | { | |
1091 | /* if (p->numHashBytes <= 4) */ | |
1092 | { | |
1093 | vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; | |
1094 | vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; | |
1095 | } | |
1096 | /* | |
1097 | else | |
1098 | { | |
1099 | vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; | |
1100 | vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; | |
1101 | } | |
1102 | */ | |
1103 | } | |
1104 | else if (p->numHashBytes == 2) | |
1105 | { | |
1106 | vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; | |
1107 | vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; | |
1108 | } | |
1109 | else if (p->numHashBytes == 3) | |
1110 | { | |
1111 | vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; | |
1112 | vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; | |
1113 | } | |
1114 | else /* if (p->numHashBytes == 4) */ | |
1115 | { | |
1116 | vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; | |
1117 | vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; | |
1118 | } | |
1119 | /* | |
1120 | else | |
1121 | { | |
1122 | vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; | |
1123 | vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; | |
1124 | } | |
1125 | */ | |
1126 | } |
0 | /* LzFind.h -- Match finder for LZ algorithms | |
1 | 2017-06-10 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #ifndef __LZ_FIND_H | |
4 | #define __LZ_FIND_H | |
5 | ||
6 | #include "7zTypes.h" | |
7 | ||
8 | EXTERN_C_BEGIN | |
9 | ||
10 | typedef UInt32 CLzRef; | |
11 | ||
12 | typedef struct _CMatchFinder | |
13 | { | |
14 | Byte *buffer; | |
15 | UInt32 pos; | |
16 | UInt32 posLimit; | |
17 | UInt32 streamPos; | |
18 | UInt32 lenLimit; | |
19 | ||
20 | UInt32 cyclicBufferPos; | |
21 | UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ | |
22 | ||
23 | Byte streamEndWasReached; | |
24 | Byte btMode; | |
25 | Byte bigHash; | |
26 | Byte directInput; | |
27 | ||
28 | UInt32 matchMaxLen; | |
29 | CLzRef *hash; | |
30 | CLzRef *son; | |
31 | UInt32 hashMask; | |
32 | UInt32 cutValue; | |
33 | ||
34 | Byte *bufferBase; | |
35 | ISeqInStream *stream; | |
36 | ||
37 | UInt32 blockSize; | |
38 | UInt32 keepSizeBefore; | |
39 | UInt32 keepSizeAfter; | |
40 | ||
41 | UInt32 numHashBytes; | |
42 | size_t directInputRem; | |
43 | UInt32 historySize; | |
44 | UInt32 fixedHashSize; | |
45 | UInt32 hashSizeSum; | |
46 | SRes result; | |
47 | UInt32 crc[256]; | |
48 | size_t numRefs; | |
49 | ||
50 | UInt64 expectedDataSize; | |
51 | } CMatchFinder; | |
52 | ||
53 | #define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) | |
54 | ||
55 | #define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) | |
56 | ||
57 | #define Inline_MatchFinder_IsFinishedOK(p) \ | |
58 | ((p)->streamEndWasReached \ | |
59 | && (p)->streamPos == (p)->pos \ | |
60 | && (!(p)->directInput || (p)->directInputRem == 0)) | |
61 | ||
62 | int MatchFinder_NeedMove(CMatchFinder *p); | |
63 | Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); | |
64 | void MatchFinder_MoveBlock(CMatchFinder *p); | |
65 | void MatchFinder_ReadIfRequired(CMatchFinder *p); | |
66 | ||
67 | void MatchFinder_Construct(CMatchFinder *p); | |
68 | ||
69 | /* Conditions: | |
70 | historySize <= 3 GB | |
71 | keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB | |
72 | */ | |
73 | int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, | |
74 | UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, | |
75 | ISzAllocPtr alloc); | |
76 | void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); | |
77 | void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); | |
78 | void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); | |
79 | ||
80 | UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, | |
81 | UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, | |
82 | UInt32 *distances, UInt32 maxLen); | |
83 | ||
84 | /* | |
85 | Conditions: | |
86 | Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. | |
87 | Mf_GetPointerToCurrentPos_Func's result must be used only before any other function | |
88 | */ | |
89 | ||
90 | typedef void (*Mf_Init_Func)(void *object); | |
91 | typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); | |
92 | typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); | |
93 | typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); | |
94 | typedef void (*Mf_Skip_Func)(void *object, UInt32); | |
95 | ||
96 | typedef struct _IMatchFinder | |
97 | { | |
98 | Mf_Init_Func Init; | |
99 | Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; | |
100 | Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; | |
101 | Mf_GetMatches_Func GetMatches; | |
102 | Mf_Skip_Func Skip; | |
103 | } IMatchFinder; | |
104 | ||
105 | void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); | |
106 | ||
107 | void MatchFinder_Init_LowHash(CMatchFinder *p); | |
108 | void MatchFinder_Init_HighHash(CMatchFinder *p); | |
109 | void MatchFinder_Init_3(CMatchFinder *p, int readData); | |
110 | void MatchFinder_Init(CMatchFinder *p); | |
111 | ||
112 | UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); | |
113 | UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); | |
114 | ||
115 | void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); | |
116 | void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); | |
117 | ||
118 | EXTERN_C_END | |
119 | ||
120 | #endif |
0 | /* LzHash.h -- HASH functions for LZ algorithms | |
1 | 2015-04-12 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #ifndef __LZ_HASH_H | |
4 | #define __LZ_HASH_H | |
5 | ||
6 | #define kHash2Size (1 << 10) | |
7 | #define kHash3Size (1 << 16) | |
8 | #define kHash4Size (1 << 20) | |
9 | ||
10 | #define kFix3HashSize (kHash2Size) | |
11 | #define kFix4HashSize (kHash2Size + kHash3Size) | |
12 | #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) | |
13 | ||
14 | #define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); | |
15 | ||
16 | #define HASH3_CALC { \ | |
17 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | |
18 | h2 = temp & (kHash2Size - 1); \ | |
19 | hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } | |
20 | ||
21 | #define HASH4_CALC { \ | |
22 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | |
23 | h2 = temp & (kHash2Size - 1); \ | |
24 | temp ^= ((UInt32)cur[2] << 8); \ | |
25 | h3 = temp & (kHash3Size - 1); \ | |
26 | hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } | |
27 | ||
28 | #define HASH5_CALC { \ | |
29 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | |
30 | h2 = temp & (kHash2Size - 1); \ | |
31 | temp ^= ((UInt32)cur[2] << 8); \ | |
32 | h3 = temp & (kHash3Size - 1); \ | |
33 | temp ^= (p->crc[cur[3]] << 5); \ | |
34 | h4 = temp & (kHash4Size - 1); \ | |
35 | hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } | |
36 | ||
37 | /* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ | |
38 | #define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; | |
39 | ||
40 | ||
41 | #define MT_HASH2_CALC \ | |
42 | h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); | |
43 | ||
44 | #define MT_HASH3_CALC { \ | |
45 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | |
46 | h2 = temp & (kHash2Size - 1); \ | |
47 | h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } | |
48 | ||
49 | #define MT_HASH4_CALC { \ | |
50 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | |
51 | h2 = temp & (kHash2Size - 1); \ | |
52 | temp ^= ((UInt32)cur[2] << 8); \ | |
53 | h3 = temp & (kHash3Size - 1); \ | |
54 | h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } | |
55 | ||
56 | #endif |
0 | /* LzmaDec.c -- LZMA Decoder | |
1 | 2018-07-04 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #include "Precomp.h" | |
4 | ||
5 | #include <string.h> | |
6 | #include <stdlib.h> | |
7 | #include <stdint.h> | |
8 | ||
9 | /* #include "CpuArch.h" */ | |
10 | #include "LzmaDec.h" | |
11 | ||
12 | #define kNumTopBits 24 | |
13 | #define kTopValue ((UInt32)1 << kNumTopBits) | |
14 | ||
15 | #define kNumBitModelTotalBits 11 | |
16 | #define kBitModelTotal (1 << kNumBitModelTotalBits) | |
17 | #define kNumMoveBits 5 | |
18 | ||
19 | #define RC_INIT_SIZE 5 | |
20 | ||
21 | #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } | |
22 | ||
23 | #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) | |
24 | #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); | |
25 | #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); | |
26 | #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ | |
27 | { UPDATE_0(p); i = (i + i); A0; } else \ | |
28 | { UPDATE_1(p); i = (i + i) + 1; A1; } | |
29 | ||
30 | #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } | |
31 | ||
32 | #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ | |
33 | { UPDATE_0(p + i); A0; } else \ | |
34 | { UPDATE_1(p + i); A1; } | |
35 | #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) | |
36 | #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) | |
37 | #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) | |
38 | ||
39 | #define TREE_DECODE(probs, limit, i) \ | |
40 | { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } | |
41 | ||
42 | /* #define _LZMA_SIZE_OPT */ | |
43 | ||
44 | #ifdef _LZMA_SIZE_OPT | |
45 | #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) | |
46 | #else | |
47 | #define TREE_6_DECODE(probs, i) \ | |
48 | { i = 1; \ | |
49 | TREE_GET_BIT(probs, i); \ | |
50 | TREE_GET_BIT(probs, i); \ | |
51 | TREE_GET_BIT(probs, i); \ | |
52 | TREE_GET_BIT(probs, i); \ | |
53 | TREE_GET_BIT(probs, i); \ | |
54 | TREE_GET_BIT(probs, i); \ | |
55 | i -= 0x40; } | |
56 | #endif | |
57 | ||
58 | #define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) | |
59 | #define MATCHED_LITER_DEC \ | |
60 | matchByte += matchByte; \ | |
61 | bit = offs; \ | |
62 | offs &= matchByte; \ | |
63 | probLit = prob + (offs + bit + symbol); \ | |
64 | GET_BIT2(probLit, symbol, offs ^= bit; , ;) | |
65 | ||
66 | ||
67 | ||
68 | #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } | |
69 | ||
70 | #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) | |
71 | #define UPDATE_0_CHECK range = bound; | |
72 | #define UPDATE_1_CHECK range -= bound; code -= bound; | |
73 | #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ | |
74 | { UPDATE_0_CHECK; i = (i + i); A0; } else \ | |
75 | { UPDATE_1_CHECK; i = (i + i) + 1; A1; } | |
76 | #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) | |
77 | #define TREE_DECODE_CHECK(probs, limit, i) \ | |
78 | { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } | |
79 | ||
80 | ||
81 | #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ | |
82 | { UPDATE_0_CHECK; i += m; m += m; } else \ | |
83 | { UPDATE_1_CHECK; m += m; i += m; } | |
84 | ||
85 | ||
86 | #define kNumPosBitsMax 4 | |
87 | #define kNumPosStatesMax (1 << kNumPosBitsMax) | |
88 | ||
89 | #define kLenNumLowBits 3 | |
90 | #define kLenNumLowSymbols (1 << kLenNumLowBits) | |
91 | #define kLenNumHighBits 8 | |
92 | #define kLenNumHighSymbols (1 << kLenNumHighBits) | |
93 | ||
94 | #define LenLow 0 | |
95 | #define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) | |
96 | #define kNumLenProbs (LenHigh + kLenNumHighSymbols) | |
97 | ||
98 | #define LenChoice LenLow | |
99 | #define LenChoice2 (LenLow + (1 << kLenNumLowBits)) | |
100 | ||
101 | #define kNumStates 12 | |
102 | #define kNumStates2 16 | |
103 | #define kNumLitStates 7 | |
104 | ||
105 | #define kStartPosModelIndex 4 | |
106 | #define kEndPosModelIndex 14 | |
107 | #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) | |
108 | ||
109 | #define kNumPosSlotBits 6 | |
110 | #define kNumLenToPosStates 4 | |
111 | ||
112 | #define kNumAlignBits 4 | |
113 | #define kAlignTableSize (1 << kNumAlignBits) | |
114 | ||
115 | #define kMatchMinLen 2 | |
116 | #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) | |
117 | ||
118 | /* External ASM code needs same CLzmaProb array layout. So don't change it. */ | |
119 | ||
120 | /* (probs_1664) is faster and better for code size at some platforms */ | |
121 | /* | |
122 | #ifdef MY_CPU_X86_OR_AMD64 | |
123 | */ | |
124 | #define kStartOffset 1664 | |
125 | #define GET_PROBS p->probs_1664 | |
126 | /* | |
127 | #define GET_PROBS p->probs + kStartOffset | |
128 | #else | |
129 | #define kStartOffset 0 | |
130 | #define GET_PROBS p->probs | |
131 | #endif | |
132 | */ | |
133 | ||
134 | #define SpecPos (-kStartOffset) | |
135 | #define IsRep0Long (SpecPos + kNumFullDistances) | |
136 | #define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) | |
137 | #define LenCoder (RepLenCoder + kNumLenProbs) | |
138 | #define IsMatch (LenCoder + kNumLenProbs) | |
139 | #define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) | |
140 | #define IsRep (Align + kAlignTableSize) | |
141 | #define IsRepG0 (IsRep + kNumStates) | |
142 | #define IsRepG1 (IsRepG0 + kNumStates) | |
143 | #define IsRepG2 (IsRepG1 + kNumStates) | |
144 | #define PosSlot (IsRepG2 + kNumStates) | |
145 | #define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) | |
146 | #define NUM_BASE_PROBS (Literal + kStartOffset) | |
147 | ||
148 | #if Align != 0 && kStartOffset != 0 | |
149 | #error Stop_Compiling_Bad_LZMA_kAlign | |
150 | #endif | |
151 | ||
152 | #if NUM_BASE_PROBS != 1984 | |
153 | #error Stop_Compiling_Bad_LZMA_PROBS | |
154 | #endif | |
155 | ||
156 | ||
157 | #define LZMA_LIT_SIZE 0x300 | |
158 | ||
159 | #define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) | |
160 | ||
161 | ||
162 | #define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) | |
163 | #define COMBINED_PS_STATE (posState + state) | |
164 | #define GET_LEN_STATE (posState) | |
165 | ||
166 | #define LZMA_DIC_MIN (1 << 12) | |
167 | ||
168 | /* | |
169 | p->remainLen : shows status of LZMA decoder: | |
170 | < kMatchSpecLenStart : normal remain | |
171 | = kMatchSpecLenStart : finished | |
172 | = kMatchSpecLenStart + 1 : need init range coder | |
173 | = kMatchSpecLenStart + 2 : need init range coder and state | |
174 | */ | |
175 | ||
176 | /* ---------- LZMA_DECODE_REAL ---------- */ | |
177 | /* | |
178 | LzmaDec_DecodeReal_3() can be implemented in external ASM file. | |
179 | 3 - is the code compatibility version of that function for check at link time. | |
180 | */ | |
181 | ||
182 | #define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 | |
183 | ||
184 | /* | |
185 | LZMA_DECODE_REAL() | |
186 | In: | |
187 | RangeCoder is normalized | |
188 | if (p->dicPos == limit) | |
189 | { | |
190 | LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. | |
191 | So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol | |
192 | is not END_OF_PAYALOAD_MARKER, then function returns error code. | |
193 | } | |
194 | ||
195 | Processing: | |
196 | first LZMA symbol will be decoded in any case | |
197 | All checks for limits are at the end of main loop, | |
198 | It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), | |
199 | RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. | |
200 | ||
201 | Out: | |
202 | RangeCoder is normalized | |
203 | Result: | |
204 | SZ_OK - OK | |
205 | SZ_ERROR_DATA - Error | |
206 | p->remainLen: | |
207 | < kMatchSpecLenStart : normal remain | |
208 | = kMatchSpecLenStart : finished | |
209 | */ | |
210 | ||
211 | ||
212 | #ifdef _LZMA_DEC_OPT | |
213 | ||
214 | int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); | |
215 | ||
216 | #else | |
217 | ||
218 | static | |
219 | int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) | |
220 | { | |
221 | CLzmaProb *probs = GET_PROBS; | |
222 | unsigned state = (unsigned)p->state; | |
223 | UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; | |
224 | unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; | |
225 | unsigned lc = p->prop.lc; | |
226 | unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); | |
227 | ||
228 | Byte *dic = p->dic; | |
229 | SizeT dicBufSize = p->dicBufSize; | |
230 | SizeT dicPos = p->dicPos; | |
231 | ||
232 | UInt32 processedPos = p->processedPos; | |
233 | UInt32 checkDicSize = p->checkDicSize; | |
234 | unsigned len = 0; | |
235 | ||
236 | const Byte *buf = p->buf; | |
237 | UInt32 range = p->range; | |
238 | UInt32 code = p->code; | |
239 | ||
240 | do | |
241 | { | |
242 | CLzmaProb *prob; | |
243 | UInt32 bound; | |
244 | unsigned ttt; | |
245 | unsigned posState = CALC_POS_STATE(processedPos, pbMask); | |
246 | ||
247 | prob = probs + IsMatch + COMBINED_PS_STATE; | |
248 | IF_BIT_0(prob) | |
249 | { | |
250 | unsigned symbol; | |
251 | UPDATE_0(prob); | |
252 | prob = probs + Literal; | |
253 | if (processedPos != 0 || checkDicSize != 0) | |
254 | prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); | |
255 | processedPos++; | |
256 | ||
257 | if (state < kNumLitStates) | |
258 | { | |
259 | state -= (state < 4) ? state : 3; | |
260 | symbol = 1; | |
261 | #ifdef _LZMA_SIZE_OPT | |
262 | do { NORMAL_LITER_DEC } while (symbol < 0x100); | |
263 | #else | |
264 | NORMAL_LITER_DEC | |
265 | NORMAL_LITER_DEC | |
266 | NORMAL_LITER_DEC | |
267 | NORMAL_LITER_DEC | |
268 | NORMAL_LITER_DEC | |
269 | NORMAL_LITER_DEC | |
270 | NORMAL_LITER_DEC | |
271 | NORMAL_LITER_DEC | |
272 | #endif | |
273 | } | |
274 | else | |
275 | { | |
276 | unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; | |
277 | unsigned offs = 0x100; | |
278 | state -= (state < 10) ? 3 : 6; | |
279 | symbol = 1; | |
280 | #ifdef _LZMA_SIZE_OPT | |
281 | do | |
282 | { | |
283 | unsigned bit; | |
284 | CLzmaProb *probLit; | |
285 | MATCHED_LITER_DEC | |
286 | } | |
287 | while (symbol < 0x100); | |
288 | #else | |
289 | { | |
290 | unsigned bit; | |
291 | CLzmaProb *probLit; | |
292 | MATCHED_LITER_DEC | |
293 | MATCHED_LITER_DEC | |
294 | MATCHED_LITER_DEC | |
295 | MATCHED_LITER_DEC | |
296 | MATCHED_LITER_DEC | |
297 | MATCHED_LITER_DEC | |
298 | MATCHED_LITER_DEC | |
299 | MATCHED_LITER_DEC | |
300 | } | |
301 | #endif | |
302 | } | |
303 | ||
304 | dic[dicPos++] = (Byte)symbol; | |
305 | continue; | |
306 | } | |
307 | ||
308 | { | |
309 | UPDATE_1(prob); | |
310 | prob = probs + IsRep + state; | |
311 | IF_BIT_0(prob) | |
312 | { | |
313 | UPDATE_0(prob); | |
314 | state += kNumStates; | |
315 | prob = probs + LenCoder; | |
316 | } | |
317 | else | |
318 | { | |
319 | UPDATE_1(prob); | |
320 | /* | |
321 | // that case was checked before with kBadRepCode | |
322 | if (checkDicSize == 0 && processedPos == 0) | |
323 | return SZ_ERROR_DATA; | |
324 | */ | |
325 | prob = probs + IsRepG0 + state; | |
326 | IF_BIT_0(prob) | |
327 | { | |
328 | UPDATE_0(prob); | |
329 | prob = probs + IsRep0Long + COMBINED_PS_STATE; | |
330 | IF_BIT_0(prob) | |
331 | { | |
332 | UPDATE_0(prob); | |
333 | dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; | |
334 | dicPos++; | |
335 | processedPos++; | |
336 | state = state < kNumLitStates ? 9 : 11; | |
337 | continue; | |
338 | } | |
339 | UPDATE_1(prob); | |
340 | } | |
341 | else | |
342 | { | |
343 | UInt32 distance; | |
344 | UPDATE_1(prob); | |
345 | prob = probs + IsRepG1 + state; | |
346 | IF_BIT_0(prob) | |
347 | { | |
348 | UPDATE_0(prob); | |
349 | distance = rep1; | |
350 | } | |
351 | else | |
352 | { | |
353 | UPDATE_1(prob); | |
354 | prob = probs + IsRepG2 + state; | |
355 | IF_BIT_0(prob) | |
356 | { | |
357 | UPDATE_0(prob); | |
358 | distance = rep2; | |
359 | } | |
360 | else | |
361 | { | |
362 | UPDATE_1(prob); | |
363 | distance = rep3; | |
364 | rep3 = rep2; | |
365 | } | |
366 | rep2 = rep1; | |
367 | } | |
368 | rep1 = rep0; | |
369 | rep0 = distance; | |
370 | } | |
371 | state = state < kNumLitStates ? 8 : 11; | |
372 | prob = probs + RepLenCoder; | |
373 | } | |
374 | ||
375 | #ifdef _LZMA_SIZE_OPT | |
376 | { | |
377 | unsigned lim, offset; | |
378 | CLzmaProb *probLen = prob + LenChoice; | |
379 | IF_BIT_0(probLen) | |
380 | { | |
381 | UPDATE_0(probLen); | |
382 | probLen = prob + LenLow + GET_LEN_STATE; | |
383 | offset = 0; | |
384 | lim = (1 << kLenNumLowBits); | |
385 | } | |
386 | else | |
387 | { | |
388 | UPDATE_1(probLen); | |
389 | probLen = prob + LenChoice2; | |
390 | IF_BIT_0(probLen) | |
391 | { | |
392 | UPDATE_0(probLen); | |
393 | probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); | |
394 | offset = kLenNumLowSymbols; | |
395 | lim = (1 << kLenNumLowBits); | |
396 | } | |
397 | else | |
398 | { | |
399 | UPDATE_1(probLen); | |
400 | probLen = prob + LenHigh; | |
401 | offset = kLenNumLowSymbols * 2; | |
402 | lim = (1 << kLenNumHighBits); | |
403 | } | |
404 | } | |
405 | TREE_DECODE(probLen, lim, len); | |
406 | len += offset; | |
407 | } | |
408 | #else | |
409 | { | |
410 | CLzmaProb *probLen = prob + LenChoice; | |
411 | IF_BIT_0(probLen) | |
412 | { | |
413 | UPDATE_0(probLen); | |
414 | probLen = prob + LenLow + GET_LEN_STATE; | |
415 | len = 1; | |
416 | TREE_GET_BIT(probLen, len); | |
417 | TREE_GET_BIT(probLen, len); | |
418 | TREE_GET_BIT(probLen, len); | |
419 | len -= 8; | |
420 | } | |
421 | else | |
422 | { | |
423 | UPDATE_1(probLen); | |
424 | probLen = prob + LenChoice2; | |
425 | IF_BIT_0(probLen) | |
426 | { | |
427 | UPDATE_0(probLen); | |
428 | probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); | |
429 | len = 1; | |
430 | TREE_GET_BIT(probLen, len); | |
431 | TREE_GET_BIT(probLen, len); | |
432 | TREE_GET_BIT(probLen, len); | |
433 | } | |
434 | else | |
435 | { | |
436 | UPDATE_1(probLen); | |
437 | probLen = prob + LenHigh; | |
438 | TREE_DECODE(probLen, (1 << kLenNumHighBits), len); | |
439 | len += kLenNumLowSymbols * 2; | |
440 | } | |
441 | } | |
442 | } | |
443 | #endif | |
444 | ||
445 | if (state >= kNumStates) | |
446 | { | |
447 | UInt32 distance; | |
448 | prob = probs + PosSlot + | |
449 | ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); | |
450 | TREE_6_DECODE(prob, distance); | |
451 | if (distance >= kStartPosModelIndex) | |
452 | { | |
453 | unsigned posSlot = (unsigned)distance; | |
454 | unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); | |
455 | distance = (2 | (distance & 1)); | |
456 | if (posSlot < kEndPosModelIndex) | |
457 | { | |
458 | distance <<= numDirectBits; | |
459 | prob = probs + SpecPos; | |
460 | { | |
461 | UInt32 m = 1; | |
462 | distance++; | |
463 | do | |
464 | { | |
465 | REV_BIT_VAR(prob, distance, m); | |
466 | } | |
467 | while (--numDirectBits); | |
468 | distance -= m; | |
469 | } | |
470 | } | |
471 | else | |
472 | { | |
473 | numDirectBits -= kNumAlignBits; | |
474 | do | |
475 | { | |
476 | NORMALIZE | |
477 | range >>= 1; | |
478 | ||
479 | { | |
480 | UInt32 t; | |
481 | code -= range; | |
482 | t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ | |
483 | distance = (distance << 1) + (t + 1); | |
484 | code += range & t; | |
485 | } | |
486 | /* | |
487 | distance <<= 1; | |
488 | if (code >= range) | |
489 | { | |
490 | code -= range; | |
491 | distance |= 1; | |
492 | } | |
493 | */ | |
494 | } | |
495 | while (--numDirectBits); | |
496 | prob = probs + Align; | |
497 | distance <<= kNumAlignBits; | |
498 | { | |
499 | unsigned i = 1; | |
500 | REV_BIT_CONST(prob, i, 1); | |
501 | REV_BIT_CONST(prob, i, 2); | |
502 | REV_BIT_CONST(prob, i, 4); | |
503 | REV_BIT_LAST (prob, i, 8); | |
504 | distance |= i; | |
505 | } | |
506 | if (distance == (UInt32)0xFFFFFFFF) | |
507 | { | |
508 | len = kMatchSpecLenStart; | |
509 | state -= kNumStates; | |
510 | break; | |
511 | } | |
512 | } | |
513 | } | |
514 | ||
515 | rep3 = rep2; | |
516 | rep2 = rep1; | |
517 | rep1 = rep0; | |
518 | rep0 = distance + 1; | |
519 | state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; | |
520 | if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) | |
521 | { | |
522 | p->dicPos = dicPos; | |
523 | return SZ_ERROR_DATA; | |
524 | } | |
525 | } | |
526 | ||
527 | len += kMatchMinLen; | |
528 | ||
529 | { | |
530 | SizeT rem; | |
531 | unsigned curLen; | |
532 | SizeT pos; | |
533 | ||
534 | if ((rem = limit - dicPos) == 0) | |
535 | { | |
536 | p->dicPos = dicPos; | |
537 | return SZ_ERROR_DATA; | |
538 | } | |
539 | ||
540 | curLen = ((rem < len) ? (unsigned)rem : len); | |
541 | pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); | |
542 | ||
543 | processedPos += (UInt32)curLen; | |
544 | ||
545 | len -= curLen; | |
546 | if (curLen <= dicBufSize - pos) | |
547 | { | |
548 | Byte *dest = dic + dicPos; | |
549 | ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; | |
550 | const Byte *lim = dest + curLen; | |
551 | dicPos += (SizeT)curLen; | |
552 | do | |
553 | *(dest) = (Byte)*(dest + src); | |
554 | while (++dest != lim); | |
555 | } | |
556 | else | |
557 | { | |
558 | do | |
559 | { | |
560 | dic[dicPos++] = dic[pos]; | |
561 | if (++pos == dicBufSize) | |
562 | pos = 0; | |
563 | } | |
564 | while (--curLen != 0); | |
565 | } | |
566 | } | |
567 | } | |
568 | } | |
569 | while (dicPos < limit && buf < bufLimit); | |
570 | ||
571 | NORMALIZE; | |
572 | ||
573 | p->buf = buf; | |
574 | p->range = range; | |
575 | p->code = code; | |
576 | p->remainLen = (UInt32)len; | |
577 | p->dicPos = dicPos; | |
578 | p->processedPos = processedPos; | |
579 | p->reps[0] = rep0; | |
580 | p->reps[1] = rep1; | |
581 | p->reps[2] = rep2; | |
582 | p->reps[3] = rep3; | |
583 | p->state = (UInt32)state; | |
584 | ||
585 | return SZ_OK; | |
586 | } | |
587 | #endif | |
588 | ||
589 | static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) | |
590 | { | |
591 | if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) | |
592 | { | |
593 | Byte *dic = p->dic; | |
594 | SizeT dicPos = p->dicPos; | |
595 | SizeT dicBufSize = p->dicBufSize; | |
596 | unsigned len = (unsigned)p->remainLen; | |
597 | SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ | |
598 | SizeT rem = limit - dicPos; | |
599 | if (rem < len) | |
600 | len = (unsigned)(rem); | |
601 | ||
602 | if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) | |
603 | p->checkDicSize = p->prop.dicSize; | |
604 | ||
605 | p->processedPos += (UInt32)len; | |
606 | p->remainLen -= (UInt32)len; | |
607 | while (len != 0) | |
608 | { | |
609 | len--; | |
610 | dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; | |
611 | dicPos++; | |
612 | } | |
613 | p->dicPos = dicPos; | |
614 | } | |
615 | } | |
616 | ||
617 | ||
618 | #define kRange0 0xFFFFFFFF | |
619 | #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) | |
620 | #define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) | |
621 | #if kBadRepCode != (0xC0000000 - 0x400) | |
622 | #error Stop_Compiling_Bad_LZMA_Check | |
623 | #endif | |
624 | ||
625 | static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit, SizeT memlimit) | |
626 | { | |
627 | do | |
628 | { | |
629 | SizeT limit2 = limit; | |
630 | if (p->checkDicSize == 0) | |
631 | { | |
632 | UInt32 rem = p->prop.dicSize - p->processedPos; | |
633 | if (limit - p->dicPos > rem) { | |
634 | if (p->dicBufSize < p->prop.dicSize) { | |
635 | p->dicBufSize = p->prop.dicSize; | |
636 | } | |
637 | if (p->dicBufSize > memlimit) { | |
638 | return SZ_ERROR_MEM; | |
639 | } | |
640 | Byte *tmp = realloc(p->dic, p->dicBufSize); | |
641 | if (!tmp) { | |
642 | return SZ_ERROR_MEM; | |
643 | } | |
644 | p->dic = tmp; | |
645 | limit2 = p->dicPos + rem; | |
646 | } | |
647 | ||
648 | if (p->processedPos == 0) | |
649 | if (p->code >= kBadRepCode) | |
650 | return SZ_ERROR_DATA; | |
651 | } | |
652 | ||
653 | RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); | |
654 | ||
655 | if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) | |
656 | p->checkDicSize = p->prop.dicSize; | |
657 | ||
658 | LzmaDec_WriteRem(p, limit); | |
659 | } | |
660 | while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); | |
661 | ||
662 | return 0; | |
663 | } | |
664 | ||
665 | typedef enum | |
666 | { | |
667 | DUMMY_ERROR, /* unexpected end of input stream */ | |
668 | DUMMY_LIT, | |
669 | DUMMY_MATCH, | |
670 | DUMMY_REP | |
671 | } ELzmaDummy; | |
672 | ||
673 | static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) | |
674 | { | |
675 | UInt32 range = p->range; | |
676 | UInt32 code = p->code; | |
677 | const Byte *bufLimit = buf + inSize; | |
678 | const CLzmaProb *probs = GET_PROBS; | |
679 | unsigned state = (unsigned)p->state; | |
680 | ELzmaDummy res; | |
681 | ||
682 | { | |
683 | const CLzmaProb *prob; | |
684 | UInt32 bound; | |
685 | unsigned ttt; | |
686 | unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); | |
687 | ||
688 | prob = probs + IsMatch + COMBINED_PS_STATE; | |
689 | IF_BIT_0_CHECK(prob) | |
690 | { | |
691 | UPDATE_0_CHECK | |
692 | ||
693 | /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ | |
694 | ||
695 | prob = probs + Literal; | |
696 | if (p->checkDicSize != 0 || p->processedPos != 0) | |
697 | prob += ((UInt32)LZMA_LIT_SIZE * | |
698 | ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + | |
699 | (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); | |
700 | ||
701 | if (state < kNumLitStates) | |
702 | { | |
703 | unsigned symbol = 1; | |
704 | do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); | |
705 | } | |
706 | else | |
707 | { | |
708 | unsigned matchByte = p->dic[p->dicPos - p->reps[0] + | |
709 | (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; | |
710 | unsigned offs = 0x100; | |
711 | unsigned symbol = 1; | |
712 | do | |
713 | { | |
714 | unsigned bit; | |
715 | const CLzmaProb *probLit; | |
716 | matchByte += matchByte; | |
717 | bit = offs; | |
718 | offs &= matchByte; | |
719 | probLit = prob + (offs + bit + symbol); | |
720 | GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) | |
721 | } | |
722 | while (symbol < 0x100); | |
723 | } | |
724 | res = DUMMY_LIT; | |
725 | } | |
726 | else | |
727 | { | |
728 | unsigned len; | |
729 | UPDATE_1_CHECK; | |
730 | ||
731 | prob = probs + IsRep + state; | |
732 | IF_BIT_0_CHECK(prob) | |
733 | { | |
734 | UPDATE_0_CHECK; | |
735 | state = 0; | |
736 | prob = probs + LenCoder; | |
737 | res = DUMMY_MATCH; | |
738 | } | |
739 | else | |
740 | { | |
741 | UPDATE_1_CHECK; | |
742 | res = DUMMY_REP; | |
743 | prob = probs + IsRepG0 + state; | |
744 | IF_BIT_0_CHECK(prob) | |
745 | { | |
746 | UPDATE_0_CHECK; | |
747 | prob = probs + IsRep0Long + COMBINED_PS_STATE; | |
748 | IF_BIT_0_CHECK(prob) | |
749 | { | |
750 | UPDATE_0_CHECK; | |
751 | NORMALIZE_CHECK; | |
752 | return DUMMY_REP; | |
753 | } | |
754 | else | |
755 | { | |
756 | UPDATE_1_CHECK; | |
757 | } | |
758 | } | |
759 | else | |
760 | { | |
761 | UPDATE_1_CHECK; | |
762 | prob = probs + IsRepG1 + state; | |
763 | IF_BIT_0_CHECK(prob) | |
764 | { | |
765 | UPDATE_0_CHECK; | |
766 | } | |
767 | else | |
768 | { | |
769 | UPDATE_1_CHECK; | |
770 | prob = probs + IsRepG2 + state; | |
771 | IF_BIT_0_CHECK(prob) | |
772 | { | |
773 | UPDATE_0_CHECK; | |
774 | } | |
775 | else | |
776 | { | |
777 | UPDATE_1_CHECK; | |
778 | } | |
779 | } | |
780 | } | |
781 | state = kNumStates; | |
782 | prob = probs + RepLenCoder; | |
783 | } | |
784 | { | |
785 | unsigned limit, offset; | |
786 | const CLzmaProb *probLen = prob + LenChoice; | |
787 | IF_BIT_0_CHECK(probLen) | |
788 | { | |
789 | UPDATE_0_CHECK; | |
790 | probLen = prob + LenLow + GET_LEN_STATE; | |
791 | offset = 0; | |
792 | limit = 1 << kLenNumLowBits; | |
793 | } | |
794 | else | |
795 | { | |
796 | UPDATE_1_CHECK; | |
797 | probLen = prob + LenChoice2; | |
798 | IF_BIT_0_CHECK(probLen) | |
799 | { | |
800 | UPDATE_0_CHECK; | |
801 | probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); | |
802 | offset = kLenNumLowSymbols; | |
803 | limit = 1 << kLenNumLowBits; | |
804 | } | |
805 | else | |
806 | { | |
807 | UPDATE_1_CHECK; | |
808 | probLen = prob + LenHigh; | |
809 | offset = kLenNumLowSymbols * 2; | |
810 | limit = 1 << kLenNumHighBits; | |
811 | } | |
812 | } | |
813 | TREE_DECODE_CHECK(probLen, limit, len); | |
814 | len += offset; | |
815 | } | |
816 | ||
817 | if (state < 4) | |
818 | { | |
819 | unsigned posSlot; | |
820 | prob = probs + PosSlot + | |
821 | ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << | |
822 | kNumPosSlotBits); | |
823 | TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); | |
824 | if (posSlot >= kStartPosModelIndex) | |
825 | { | |
826 | unsigned numDirectBits = ((posSlot >> 1) - 1); | |
827 | ||
828 | /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ | |
829 | ||
830 | if (posSlot < kEndPosModelIndex) | |
831 | { | |
832 | prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); | |
833 | } | |
834 | else | |
835 | { | |
836 | numDirectBits -= kNumAlignBits; | |
837 | do | |
838 | { | |
839 | NORMALIZE_CHECK | |
840 | range >>= 1; | |
841 | code -= range & (((code - range) >> 31) - 1); | |
842 | /* if (code >= range) code -= range; */ | |
843 | } | |
844 | while (--numDirectBits); | |
845 | prob = probs + Align; | |
846 | numDirectBits = kNumAlignBits; | |
847 | } | |
848 | { | |
849 | unsigned i = 1; | |
850 | unsigned m = 1; | |
851 | do | |
852 | { | |
853 | REV_BIT_CHECK(prob, i, m); | |
854 | } | |
855 | while (--numDirectBits); | |
856 | } | |
857 | } | |
858 | } | |
859 | } | |
860 | } | |
861 | NORMALIZE_CHECK; | |
862 | return res; | |
863 | } | |
864 | ||
865 | ||
866 | static void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) | |
867 | { | |
868 | p->remainLen = kMatchSpecLenStart + 1; | |
869 | p->tempBufSize = 0; | |
870 | ||
871 | if (initDic) | |
872 | { | |
873 | p->processedPos = 0; | |
874 | p->checkDicSize = 0; | |
875 | p->remainLen = kMatchSpecLenStart + 2; | |
876 | } | |
877 | if (initState) | |
878 | p->remainLen = kMatchSpecLenStart + 2; | |
879 | } | |
880 | ||
881 | void LzmaDec_Init(CLzmaDec *p) | |
882 | { | |
883 | p->dicPos = 0; | |
884 | LzmaDec_InitDicAndState(p, True, True); | |
885 | } | |
886 | ||
887 | ||
888 | SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, | |
889 | ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit) | |
890 | { | |
891 | SizeT inSize = *srcLen; | |
892 | (*srcLen) = 0; | |
893 | ||
894 | *status = LZMA_STATUS_NOT_SPECIFIED; | |
895 | ||
896 | if (p->remainLen > kMatchSpecLenStart) | |
897 | { | |
898 | for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) | |
899 | p->tempBuf[p->tempBufSize++] = *src++; | |
900 | if (p->tempBufSize != 0 && p->tempBuf[0] != 0) | |
901 | return SZ_ERROR_DATA; | |
902 | if (p->tempBufSize < RC_INIT_SIZE) | |
903 | { | |
904 | *status = LZMA_STATUS_NEEDS_MORE_INPUT; | |
905 | return SZ_OK; | |
906 | } | |
907 | p->code = | |
908 | ((UInt32)p->tempBuf[1] << 24) | |
909 | | ((UInt32)p->tempBuf[2] << 16) | |
910 | | ((UInt32)p->tempBuf[3] << 8) | |
911 | | ((UInt32)p->tempBuf[4]); | |
912 | p->range = 0xFFFFFFFF; | |
913 | p->tempBufSize = 0; | |
914 | ||
915 | if (p->remainLen > kMatchSpecLenStart + 1) | |
916 | { | |
917 | SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); | |
918 | SizeT i; | |
919 | CLzmaProb *probs = p->probs; | |
920 | for (i = 0; i < numProbs; i++) | |
921 | probs[i] = kBitModelTotal >> 1; | |
922 | p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; | |
923 | p->state = 0; | |
924 | } | |
925 | ||
926 | p->remainLen = 0; | |
927 | } | |
928 | ||
929 | LzmaDec_WriteRem(p, dicLimit); | |
930 | ||
931 | while (p->remainLen != kMatchSpecLenStart) | |
932 | { | |
933 | int checkEndMarkNow = 0; | |
934 | ||
935 | if (p->dicPos >= dicLimit) | |
936 | { | |
937 | if (p->remainLen == 0 && p->code == 0) | |
938 | { | |
939 | *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; | |
940 | return SZ_OK; | |
941 | } | |
942 | if (finishMode == LZMA_FINISH_ANY) | |
943 | { | |
944 | *status = LZMA_STATUS_NOT_FINISHED; | |
945 | return SZ_OK; | |
946 | } | |
947 | if (p->remainLen != 0) | |
948 | { | |
949 | *status = LZMA_STATUS_NOT_FINISHED; | |
950 | return SZ_ERROR_DATA; | |
951 | } | |
952 | checkEndMarkNow = 1; | |
953 | } | |
954 | ||
955 | if (p->tempBufSize == 0) | |
956 | { | |
957 | SizeT processed; | |
958 | const Byte *bufLimit; | |
959 | if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) | |
960 | { | |
961 | int dummyRes = LzmaDec_TryDummy(p, src, inSize); | |
962 | if (dummyRes == DUMMY_ERROR) | |
963 | { | |
964 | memcpy(p->tempBuf, src, inSize); | |
965 | p->tempBufSize = (unsigned)inSize; | |
966 | (*srcLen) += inSize; | |
967 | *status = LZMA_STATUS_NEEDS_MORE_INPUT; | |
968 | return SZ_OK; | |
969 | } | |
970 | if (checkEndMarkNow && dummyRes != DUMMY_MATCH) | |
971 | { | |
972 | *status = LZMA_STATUS_NOT_FINISHED; | |
973 | return SZ_ERROR_DATA; | |
974 | } | |
975 | bufLimit = src; | |
976 | } | |
977 | else | |
978 | bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; | |
979 | p->buf = src; | |
980 | if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit, memlimit) != 0) | |
981 | return SZ_ERROR_DATA; | |
982 | processed = (SizeT)(p->buf - src); | |
983 | (*srcLen) += processed; | |
984 | src += processed; | |
985 | inSize -= processed; | |
986 | } | |
987 | else | |
988 | { | |
989 | unsigned rem = p->tempBufSize, lookAhead = 0; | |
990 | while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) | |
991 | p->tempBuf[rem++] = src[lookAhead++]; | |
992 | p->tempBufSize = rem; | |
993 | if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) | |
994 | { | |
995 | int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); | |
996 | if (dummyRes == DUMMY_ERROR) | |
997 | { | |
998 | (*srcLen) += (SizeT)lookAhead; | |
999 | *status = LZMA_STATUS_NEEDS_MORE_INPUT; | |
1000 | return SZ_OK; | |
1001 | } | |
1002 | if (checkEndMarkNow && dummyRes != DUMMY_MATCH) | |
1003 | { | |
1004 | *status = LZMA_STATUS_NOT_FINISHED; | |
1005 | return SZ_ERROR_DATA; | |
1006 | } | |
1007 | } | |
1008 | p->buf = p->tempBuf; | |
1009 | if (LzmaDec_DecodeReal2(p, dicLimit, p->buf, memlimit) != 0) | |
1010 | return SZ_ERROR_DATA; | |
1011 | ||
1012 | { | |
1013 | unsigned kkk = (unsigned)(p->buf - p->tempBuf); | |
1014 | if (rem < kkk) | |
1015 | return SZ_ERROR_FAIL; /* some internal error */ | |
1016 | rem -= kkk; | |
1017 | if (lookAhead < rem) | |
1018 | return SZ_ERROR_FAIL; /* some internal error */ | |
1019 | lookAhead -= rem; | |
1020 | } | |
1021 | (*srcLen) += (SizeT)lookAhead; | |
1022 | src += lookAhead; | |
1023 | inSize -= (SizeT)lookAhead; | |
1024 | p->tempBufSize = 0; | |
1025 | } | |
1026 | } | |
1027 | ||
1028 | if (p->code != 0) | |
1029 | return SZ_ERROR_DATA; | |
1030 | *status = LZMA_STATUS_FINISHED_WITH_MARK; | |
1031 | return SZ_OK; | |
1032 | } | |
1033 | ||
1034 | ||
1035 | SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit) | |
1036 | { | |
1037 | SizeT outSize = *destLen; | |
1038 | SizeT inSize = *srcLen; | |
1039 | *srcLen = *destLen = 0; | |
1040 | for (;;) | |
1041 | { | |
1042 | SizeT inSizeCur = inSize, outSizeCur, dicPos; | |
1043 | ELzmaFinishMode curFinishMode; | |
1044 | SRes res; | |
1045 | if (p->dicPos == p->dicBufSize) { | |
1046 | if (p->dicBufSize < p->prop.dicSize) { | |
1047 | if (p->dicBufSize < memlimit) { | |
1048 | p->dicBufSize = p->dicBufSize << 2; | |
1049 | if (p->dicBufSize > memlimit) { | |
1050 | p->dicBufSize = memlimit; | |
1051 | } | |
1052 | if (p->dicBufSize > p->prop.dicSize) { | |
1053 | p->dicBufSize = p->prop.dicSize; | |
1054 | } | |
1055 | Byte *tmp = realloc(p->dic, p->dicBufSize); | |
1056 | if (!tmp) { | |
1057 | return SZ_ERROR_MEM; | |
1058 | } | |
1059 | p->dic = tmp; | |
1060 | } else { | |
1061 | return SZ_ERROR_MEM; | |
1062 | } | |
1063 | } else { | |
1064 | p->dicPos = 0; | |
1065 | } | |
1066 | } | |
1067 | dicPos = p->dicPos; | |
1068 | if (outSize > p->dicBufSize - dicPos) | |
1069 | { | |
1070 | outSizeCur = p->dicBufSize; | |
1071 | curFinishMode = LZMA_FINISH_ANY; | |
1072 | } | |
1073 | else | |
1074 | { | |
1075 | outSizeCur = dicPos + outSize; | |
1076 | curFinishMode = finishMode; | |
1077 | } | |
1078 | ||
1079 | res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status, memlimit); | |
1080 | src += inSizeCur; | |
1081 | inSize -= inSizeCur; | |
1082 | *srcLen += inSizeCur; | |
1083 | outSizeCur = p->dicPos - dicPos; | |
1084 | memcpy(dest, p->dic + dicPos, outSizeCur); | |
1085 | dest += outSizeCur; | |
1086 | outSize -= outSizeCur; | |
1087 | *destLen += outSizeCur; | |
1088 | if (res != 0) | |
1089 | return res; | |
1090 | if (outSizeCur == 0 || outSize == 0) | |
1091 | return SZ_OK; | |
1092 | } | |
1093 | } | |
1094 | ||
1095 | void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) | |
1096 | { | |
1097 | ISzAlloc_Free(alloc, p->probs); | |
1098 | p->probs = NULL; | |
1099 | } | |
1100 | ||
1101 | static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) | |
1102 | { | |
1103 | ISzAlloc_Free(alloc, p->dic); | |
1104 | p->dic = NULL; | |
1105 | } | |
1106 | ||
1107 | void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) | |
1108 | { | |
1109 | LzmaDec_FreeProbs(p, alloc); | |
1110 | LzmaDec_FreeDict(p, alloc); | |
1111 | } | |
1112 | ||
1113 | SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) | |
1114 | { | |
1115 | UInt32 dicSize; | |
1116 | Byte d; | |
1117 | ||
1118 | if (size < LZMA_PROPS_SIZE) | |
1119 | return SZ_ERROR_UNSUPPORTED; | |
1120 | else | |
1121 | dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); | |
1122 | ||
1123 | if (dicSize < LZMA_DIC_MIN) | |
1124 | dicSize = LZMA_DIC_MIN; | |
1125 | p->dicSize = dicSize; | |
1126 | ||
1127 | d = data[0]; | |
1128 | if (d >= (9 * 5 * 5)) | |
1129 | return SZ_ERROR_UNSUPPORTED; | |
1130 | ||
1131 | p->lc = (Byte)(d % 9); | |
1132 | d /= 9; | |
1133 | p->pb = (Byte)(d / 5); | |
1134 | p->lp = (Byte)(d % 5); | |
1135 | p->_pad_ = 0; | |
1136 | ||
1137 | return SZ_OK; | |
1138 | } | |
1139 | ||
1140 | static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) | |
1141 | { | |
1142 | UInt32 numProbs = LzmaProps_GetNumProbs(propNew); | |
1143 | if (!p->probs || numProbs != p->numProbs) | |
1144 | { | |
1145 | LzmaDec_FreeProbs(p, alloc); | |
1146 | p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); | |
1147 | if (!p->probs) | |
1148 | return SZ_ERROR_MEM; | |
1149 | p->probs_1664 = p->probs + 1664; | |
1150 | p->numProbs = numProbs; | |
1151 | } | |
1152 | return SZ_OK; | |
1153 | } | |
1154 | ||
1155 | SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) | |
1156 | { | |
1157 | CLzmaProps propNew; | |
1158 | RINOK(LzmaProps_Decode(&propNew, props, propsSize)); | |
1159 | RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); | |
1160 | p->prop = propNew; | |
1161 | return SZ_OK; | |
1162 | } | |
1163 | ||
1164 | SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) | |
1165 | { | |
1166 | CLzmaProps propNew; | |
1167 | SizeT dicBufSize; | |
1168 | RINOK(LzmaProps_Decode(&propNew, props, propsSize)); | |
1169 | RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); | |
1170 | ||
1171 | { | |
1172 | UInt32 dictSize = propNew.dicSize; | |
1173 | SizeT mask = ((UInt32)1 << 12) - 1; | |
1174 | if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; | |
1175 | else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; | |
1176 | dicBufSize = ((SizeT)dictSize + mask) & ~mask; | |
1177 | if (dicBufSize < dictSize) | |
1178 | dicBufSize = dictSize; | |
1179 | } | |
1180 | if (dicBufSize > LZMA_DIC_MIN) { | |
1181 | dicBufSize = LZMA_DIC_MIN; | |
1182 | } | |
1183 | ||
1184 | if (!p->dic || dicBufSize != p->dicBufSize) | |
1185 | { | |
1186 | LzmaDec_FreeDict(p, alloc); | |
1187 | p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); | |
1188 | if (!p->dic) | |
1189 | { | |
1190 | LzmaDec_FreeProbs(p, alloc); | |
1191 | return SZ_ERROR_MEM; | |
1192 | } | |
1193 | } | |
1194 | p->dicBufSize = dicBufSize; | |
1195 | p->prop = propNew; | |
1196 | return SZ_OK; | |
1197 | } | |
1198 | ||
1199 | SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, | |
1200 | const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, | |
1201 | ELzmaStatus *status, ISzAllocPtr alloc) | |
1202 | { | |
1203 | CLzmaDec p; | |
1204 | SRes res; | |
1205 | SizeT outSize = *destLen, inSize = *srcLen; | |
1206 | *destLen = *srcLen = 0; | |
1207 | *status = LZMA_STATUS_NOT_SPECIFIED; | |
1208 | if (inSize < RC_INIT_SIZE) | |
1209 | return SZ_ERROR_INPUT_EOF; | |
1210 | LzmaDec_Construct(&p); | |
1211 | RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); | |
1212 | p.dic = dest; | |
1213 | p.dicBufSize = outSize; | |
1214 | LzmaDec_Init(&p); | |
1215 | *srcLen = inSize; | |
1216 | res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status, SIZE_MAX); | |
1217 | *destLen = p.dicPos; | |
1218 | if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) | |
1219 | res = SZ_ERROR_INPUT_EOF; | |
1220 | LzmaDec_FreeProbs(&p, alloc); | |
1221 | return res; | |
1222 | } |
0 | /* LzmaDec.h -- LZMA Decoder | |
1 | 2018-04-21 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #ifndef __LZMA_DEC_H | |
4 | #define __LZMA_DEC_H | |
5 | ||
6 | #include "7zTypes.h" | |
7 | ||
8 | EXTERN_C_BEGIN | |
9 | ||
10 | /* #define _LZMA_PROB32 */ | |
11 | /* _LZMA_PROB32 can increase the speed on some CPUs, | |
12 | but memory usage for CLzmaDec::probs will be doubled in that case */ | |
13 | ||
14 | typedef | |
15 | #ifdef _LZMA_PROB32 | |
16 | UInt32 | |
17 | #else | |
18 | UInt16 | |
19 | #endif | |
20 | CLzmaProb; | |
21 | ||
22 | ||
23 | /* ---------- LZMA Properties ---------- */ | |
24 | ||
25 | #define LZMA_PROPS_SIZE 5 | |
26 | ||
27 | typedef struct _CLzmaProps | |
28 | { | |
29 | Byte lc; | |
30 | Byte lp; | |
31 | Byte pb; | |
32 | Byte _pad_; | |
33 | UInt32 dicSize; | |
34 | } CLzmaProps; | |
35 | ||
36 | /* LzmaProps_Decode - decodes properties | |
37 | Returns: | |
38 | SZ_OK | |
39 | SZ_ERROR_UNSUPPORTED - Unsupported properties | |
40 | */ | |
41 | ||
42 | SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); | |
43 | ||
44 | ||
45 | /* ---------- LZMA Decoder state ---------- */ | |
46 | ||
47 | /* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. | |
48 | Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ | |
49 | ||
50 | #define LZMA_REQUIRED_INPUT_MAX 20 | |
51 | ||
52 | typedef struct | |
53 | { | |
54 | /* Don't change this structure. ASM code can use it. */ | |
55 | CLzmaProps prop; | |
56 | CLzmaProb *probs; | |
57 | CLzmaProb *probs_1664; | |
58 | Byte *dic; | |
59 | SizeT dicBufSize; | |
60 | SizeT dicPos; | |
61 | const Byte *buf; | |
62 | UInt32 range; | |
63 | UInt32 code; | |
64 | UInt32 processedPos; | |
65 | UInt32 checkDicSize; | |
66 | UInt32 reps[4]; | |
67 | UInt32 state; | |
68 | UInt32 remainLen; | |
69 | ||
70 | UInt32 numProbs; | |
71 | unsigned tempBufSize; | |
72 | Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; | |
73 | } CLzmaDec; | |
74 | ||
75 | #define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } | |
76 | ||
77 | void LzmaDec_Init(CLzmaDec *p); | |
78 | ||
79 | /* There are two types of LZMA streams: | |
80 | - Stream with end mark. That end mark adds about 6 bytes to compressed size. | |
81 | - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ | |
82 | ||
83 | typedef enum | |
84 | { | |
85 | LZMA_FINISH_ANY, /* finish at any point */ | |
86 | LZMA_FINISH_END /* block must be finished at the end */ | |
87 | } ELzmaFinishMode; | |
88 | ||
89 | /* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! | |
90 | ||
91 | You must use LZMA_FINISH_END, when you know that current output buffer | |
92 | covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. | |
93 | ||
94 | If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, | |
95 | and output value of destLen will be less than output buffer size limit. | |
96 | You can check status result also. | |
97 | ||
98 | You can use multiple checks to test data integrity after full decompression: | |
99 | 1) Check Result and "status" variable. | |
100 | 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. | |
101 | 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. | |
102 | You must use correct finish mode in that case. */ | |
103 | ||
104 | typedef enum | |
105 | { | |
106 | LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ | |
107 | LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ | |
108 | LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ | |
109 | LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ | |
110 | LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ | |
111 | } ELzmaStatus; | |
112 | ||
113 | /* ELzmaStatus is used only as output value for function call */ | |
114 | ||
115 | ||
116 | /* ---------- Interfaces ---------- */ | |
117 | ||
118 | /* There are 3 levels of interfaces: | |
119 | 1) Dictionary Interface | |
120 | 2) Buffer Interface | |
121 | 3) One Call Interface | |
122 | You can select any of these interfaces, but don't mix functions from different | |
123 | groups for same object. */ | |
124 | ||
125 | ||
126 | /* There are two variants to allocate state for Dictionary Interface: | |
127 | 1) LzmaDec_Allocate / LzmaDec_Free | |
128 | 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs | |
129 | You can use variant 2, if you set dictionary buffer manually. | |
130 | For Buffer Interface you must always use variant 1. | |
131 | ||
132 | LzmaDec_Allocate* can return: | |
133 | SZ_OK | |
134 | SZ_ERROR_MEM - Memory allocation error | |
135 | SZ_ERROR_UNSUPPORTED - Unsupported properties | |
136 | */ | |
137 | ||
138 | SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); | |
139 | void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); | |
140 | ||
141 | SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); | |
142 | void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); | |
143 | ||
144 | /* ---------- Dictionary Interface ---------- */ | |
145 | ||
146 | /* You can use it, if you want to eliminate the overhead for data copying from | |
147 | dictionary to some other external buffer. | |
148 | You must work with CLzmaDec variables directly in this interface. | |
149 | ||
150 | STEPS: | |
151 | LzmaDec_Construct() | |
152 | LzmaDec_Allocate() | |
153 | for (each new stream) | |
154 | { | |
155 | LzmaDec_Init() | |
156 | while (it needs more decompression) | |
157 | { | |
158 | LzmaDec_DecodeToDic() | |
159 | use data from CLzmaDec::dic and update CLzmaDec::dicPos | |
160 | } | |
161 | } | |
162 | LzmaDec_Free() | |
163 | */ | |
164 | ||
165 | /* LzmaDec_DecodeToDic | |
166 | ||
167 | The decoding to internal dictionary buffer (CLzmaDec::dic). | |
168 | You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! | |
169 | ||
170 | finishMode: | |
171 | It has meaning only if the decoding reaches output limit (dicLimit). | |
172 | LZMA_FINISH_ANY - Decode just dicLimit bytes. | |
173 | LZMA_FINISH_END - Stream must be finished after dicLimit. | |
174 | ||
175 | Returns: | |
176 | SZ_OK | |
177 | status: | |
178 | LZMA_STATUS_FINISHED_WITH_MARK | |
179 | LZMA_STATUS_NOT_FINISHED | |
180 | LZMA_STATUS_NEEDS_MORE_INPUT | |
181 | LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK | |
182 | SZ_ERROR_DATA - Data error | |
183 | */ | |
184 | ||
185 | SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, | |
186 | const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit); | |
187 | ||
188 | ||
189 | /* ---------- Buffer Interface ---------- */ | |
190 | ||
191 | /* It's zlib-like interface. | |
192 | See LzmaDec_DecodeToDic description for information about STEPS and return results, | |
193 | but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need | |
194 | to work with CLzmaDec variables manually. | |
195 | ||
196 | finishMode: | |
197 | It has meaning only if the decoding reaches output limit (*destLen). | |
198 | LZMA_FINISH_ANY - Decode just destLen bytes. | |
199 | LZMA_FINISH_END - Stream must be finished after (*destLen). | |
200 | */ | |
201 | ||
202 | SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, | |
203 | const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status, SizeT memlimit); | |
204 | ||
205 | ||
206 | /* ---------- One Call Interface ---------- */ | |
207 | ||
208 | /* LzmaDecode | |
209 | ||
210 | finishMode: | |
211 | It has meaning only if the decoding reaches output limit (*destLen). | |
212 | LZMA_FINISH_ANY - Decode just destLen bytes. | |
213 | LZMA_FINISH_END - Stream must be finished after (*destLen). | |
214 | ||
215 | Returns: | |
216 | SZ_OK | |
217 | status: | |
218 | LZMA_STATUS_FINISHED_WITH_MARK | |
219 | LZMA_STATUS_NOT_FINISHED | |
220 | LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK | |
221 | SZ_ERROR_DATA - Data error | |
222 | SZ_ERROR_MEM - Memory allocation error | |
223 | SZ_ERROR_UNSUPPORTED - Unsupported properties | |
224 | SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). | |
225 | */ | |
226 | ||
227 | SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, | |
228 | const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, | |
229 | ELzmaStatus *status, ISzAllocPtr alloc); | |
230 | ||
231 | EXTERN_C_END | |
232 | ||
233 | #endif |
0 | ||
1 | h_sources = LzmaDec.h 7zTypes.h | |
2 | ||
3 | h_sources_private = LzFind.h LzHash.h Compiler.h Precomp.h | |
4 | ||
5 | c_sources = LzFind.c LzmaDec.c | |
6 | ||
7 | AM_CFLAGS = -I$(top_srcdir) -D_GNU_SOURCE -g -Wall -Wextra -std=gnu99 -pedantic \ | |
8 | -Wextra -Wno-missing-field-initializers -Wshadow -Wpointer-arith \ | |
9 | -Wstrict-prototypes -Wmissing-prototypes -Wno-unused-parameter | |
10 | ||
11 | library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)/lzma | |
12 | library_include_HEADERS = $(h_sources) | |
13 | ||
14 | noinst_LTLIBRARIES = liblzma-c.la | |
15 | liblzma_c_la_SOURCES = $(h_sources) $(h_sources_private) $(c_sources) |
0 | /* Precomp.h -- StdAfx | |
1 | 2013-11-12 : Igor Pavlov : Public domain */ | |
2 | ||
3 | #ifndef __7Z_PRECOMP_H | |
4 | #define __7Z_PRECOMP_H | |
5 | ||
6 | #include "Compiler.h" | |
7 | /* #include "7zTypes.h" */ | |
8 | ||
9 | #endif |
7 | 7 | Server: Apache |
8 | 8 | Connection: close |
9 | 9 | Content-Type: text/html |
10 | Content-Length: 12 | |
10 | 11 | Content-Length: 11 |
11 | Content-Length: 12 | |
12 | 12 | |
13 | 13 | Hello World!⏎ |
0 | >>> | |
1 | POST / HTTP/1.1 | |
2 | Host: localhost | |
3 | Content-Type: application/x-www-form-urlencoded | |
4 | ||
5 | login=foo&password=bar | |
6 | <<< | |
7 | HTTP/1.1 200 OK | |
8 | Content-Length: 0 | |
9 | ||
10 | ||
11 | >>> | |
12 | GET / HTTP/1.1 | |
13 | Host: localhost | |
14 |
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
37 | 37 | static int HTPCallbackRequestHeaderData(htp_tx_data_t *tx_data) |
38 | 38 | { |
39 | 39 | fprintf(logfile, "HTPCallbackRequestHeaderData %"PRIuMAX"\n", (uintmax_t)tx_data->len); |
40 | if (tx_data->len > 0) { | |
41 | fprintf(logfile, "HTPCallbackRequestHeaderData %x %x\n", tx_data->data[0], tx_data->data[(uintmax_t)tx_data->len-1]); | |
42 | } | |
40 | 43 | return 0; |
41 | 44 | } |
42 | 45 | |
43 | 46 | static int HTPCallbackResponseHeaderData(htp_tx_data_t *tx_data) |
44 | 47 | { |
45 | 48 | fprintf(logfile, "HTPCallbackResponseHeaderData %"PRIuMAX"\n", (uintmax_t)tx_data->len); |
49 | if (tx_data->len > 0) { | |
50 | fprintf(logfile, "HTPCallbackResponseHeaderData %x %x\n", tx_data->data[0], tx_data->data[(uintmax_t)tx_data->len-1]); | |
51 | } | |
46 | 52 | return 0; |
47 | 53 | } |
48 | 54 | |
61 | 67 | static int HTPCallbackRequestBodyData(htp_tx_data_t *tx_data) |
62 | 68 | { |
63 | 69 | fprintf(logfile, "HTPCallbackRequestBodyData %"PRIuMAX"\n", (uintmax_t)tx_data->len); |
70 | if (tx_data->len > 0) { | |
71 | fprintf(logfile, "HTPCallbackRequestBodyData %x %x\n", tx_data->data[0], tx_data->data[(uintmax_t)tx_data->len-1]); | |
72 | } | |
64 | 73 | return 0; |
65 | 74 | } |
66 | 75 | |
67 | 76 | static int HTPCallbackResponseBodyData(htp_tx_data_t *tx_data) |
68 | 77 | { |
69 | 78 | fprintf(logfile, "HTPCallbackResponseBodyData %"PRIuMAX"\n", (uintmax_t)tx_data->len); |
79 | if (tx_data->len > 0) { | |
80 | fprintf(logfile, "HTPCallbackResponseBodyData %x %x\n", tx_data->data[0], tx_data->data[(uintmax_t)tx_data->len-1]); | |
81 | } | |
70 | 82 | return 0; |
71 | 83 | } |
72 | 84 | |
219 | 231 | htp_connp_res_data(connp, NULL, out_data + out_data_offset, out_data_len - out_data_offset); |
220 | 232 | } |
221 | 233 | |
234 | htp_connp_close(connp, NULL); | |
222 | 235 | htp_connp_destroy_all(connp); |
223 | 236 | // Destroy LibHTP configuration |
224 | 237 | htp_config_destroy(cfg); |
0 | import sys | |
1 | import binascii | |
2 | ||
3 | # Transforms a pcap into a test file for libhtp | |
4 | # tshark -Tfields -e tcp.dstport -e tcp.payload -r input.pcap > input.txt | |
5 | # python pcaptohtp.py input.txt > input.t | |
6 | ||
7 | f = open(sys.argv[1]) | |
8 | for l in f.readlines(): | |
9 | portAndPl=l.split() | |
10 | if len(portAndPl) == 2: | |
11 | # determine request or response based on port | |
12 | if portAndPl[0] == "80": | |
13 | print(">>>") | |
14 | else: | |
15 | print("<<<") | |
16 | print(binascii.unhexlify(portAndPl[1].replace(":",""))) |
1008 | 1008 | ASSERT_TRUE(tx->request_auth_password == NULL); |
1009 | 1009 | } |
1010 | 1010 | |
1011 | TEST_F(ConnectionParsing, Http_0_9_MethodOnly) { | |
1012 | int rc = test_run(home, "42-http_0_9-method_only.t", cfg, &connp); | |
1011 | TEST_F(ConnectionParsing, Unknown_MethodOnly) { | |
1012 | int rc = test_run(home, "42-unknown-method_only.t", cfg, &connp); | |
1013 | 1013 | ASSERT_GE(rc, 0); |
1014 | 1014 | |
1015 | 1015 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); |
1667 | 1667 | |
1668 | 1668 | TEST_F(ConnectionParsing, ResponseMultipleClMismatch) { |
1669 | 1669 | int rc = test_run(home, "88-response-multiple-cl-mismatch.t", cfg, &connp); |
1670 | ASSERT_LT(rc, 0); // Expect error | |
1671 | ||
1672 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
1673 | ||
1674 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1675 | ASSERT_TRUE(tx != NULL); | |
1676 | ||
1677 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1678 | ASSERT_EQ(HTP_RESPONSE_HEADERS, tx->response_progress); | |
1670 | ASSERT_GE(rc, 0); | |
1671 | ||
1672 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
1673 | ||
1674 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1675 | ASSERT_TRUE(tx != NULL); | |
1676 | ||
1677 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1678 | ASSERT_EQ(HTP_RESPONSE_COMPLETE, tx->response_progress); | |
1679 | ||
1680 | ASSERT_TRUE(tx->flags & HTP_REQUEST_SMUGGLING); | |
1681 | ||
1682 | htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->response_headers, "Content-Length"); | |
1683 | ASSERT_TRUE(h != NULL); | |
1684 | ASSERT_TRUE(h->value != NULL); | |
1685 | ASSERT_TRUE(h->flags & HTP_FIELD_REPEATED); | |
1686 | ||
1687 | ASSERT_EQ(0, bstr_cmp_c(h->value, "12")); | |
1688 | ||
1689 | ASSERT_EQ(2, htp_list_size(tx->conn->messages)); | |
1690 | htp_log_t *log = (htp_log_t *) htp_list_get(tx->conn->messages, 1); | |
1691 | ASSERT_TRUE(log != NULL); | |
1692 | ASSERT_EQ(0, strcmp(log->msg, "Ambiguous response C-L value")); | |
1693 | ASSERT_EQ(HTP_LOG_WARNING, log->level); | |
1679 | 1694 | } |
1680 | 1695 | |
1681 | 1696 | TEST_F(ConnectionParsing, ResponseInvalidCl) { |
1682 | 1697 | int rc = test_run(home, "75-response-invalid-cl.t", cfg, &connp); |
1683 | ASSERT_LT(rc, 0); // Expect error. | |
1684 | ||
1685 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
1686 | ||
1687 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1688 | ASSERT_TRUE(tx != NULL); | |
1689 | ||
1690 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1691 | ASSERT_EQ(HTP_RESPONSE_HEADERS, tx->response_progress); | |
1698 | ASSERT_GE(rc, 0); | |
1699 | ||
1700 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
1701 | ||
1702 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1703 | ASSERT_TRUE(tx != NULL); | |
1704 | ||
1705 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1706 | ASSERT_EQ(HTP_RESPONSE_COMPLETE, tx->response_progress); | |
1692 | 1707 | |
1693 | 1708 | ASSERT_FALSE(tx->flags & HTP_REQUEST_SMUGGLING); |
1694 | 1709 | } |
1913 | 1928 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); |
1914 | 1929 | ASSERT_EQ(HTP_RESPONSE_COMPLETE, tx->response_progress); |
1915 | 1930 | } |
1931 | ||
1932 | TEST_F(ConnectionParsing, RequestInvalid) { | |
1933 | int rc = test_run(home, "91-request-unexpected-body.t", cfg, &connp); | |
1934 | ASSERT_GE(rc, 0); | |
1935 | ||
1936 | ASSERT_EQ(2, htp_list_size(connp->conn->transactions)); | |
1937 | ||
1938 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1939 | ASSERT_TRUE(tx != NULL); | |
1940 | ASSERT_EQ(0, bstr_cmp_c(tx->request_method, "POST")); | |
1941 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1942 | ASSERT_EQ(HTP_RESPONSE_COMPLETE, tx->response_progress); | |
1943 | ||
1944 | tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 1); | |
1945 | ASSERT_TRUE(tx != NULL); | |
1946 | ASSERT_EQ(0, bstr_cmp_c(tx->request_method, "GET")); | |
1947 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1948 | ASSERT_EQ(HTP_RESPONSE_NOT_STARTED, tx->response_progress); | |
1949 | } | |
1950 | ||
1951 | TEST_F(ConnectionParsing, Http_0_9_MethodOnly) { | |
1952 | int rc = test_run(home, "92-http_0_9-method_only.t", cfg, &connp); | |
1953 | ASSERT_GE(rc, 0); | |
1954 | ||
1955 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1956 | ASSERT_TRUE(tx != NULL); | |
1957 | ||
1958 | ASSERT_EQ(HTP_REQUEST_COMPLETE, tx->request_progress); | |
1959 | ||
1960 | ASSERT_TRUE(tx->request_method != NULL); | |
1961 | ASSERT_EQ(0, bstr_cmp_c(tx->request_method, "GET")); | |
1962 | ||
1963 | ASSERT_EQ(0, bstr_cmp_c(tx->request_uri, "/")); | |
1964 | ||
1965 | ASSERT_EQ(1, tx->is_protocol_0_9); | |
1966 | } | |
1967 | ||
1968 | TEST_F(ConnectionParsing, CompressedResponseDeflateAsGzip) { | |
1969 | int rc = test_run(home, "93-compressed-response-deflateasgzip.t", cfg, &connp); | |
1970 | ASSERT_GE(rc, 0); | |
1971 | ||
1972 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
1973 | ||
1974 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1975 | ASSERT_TRUE(tx != NULL); | |
1976 | ||
1977 | ASSERT_TRUE(htp_tx_is_complete(tx)); | |
1978 | ||
1979 | ASSERT_EQ(755, tx->response_message_len); | |
1980 | ||
1981 | ASSERT_EQ(1433, tx->response_entity_len); | |
1982 | } | |
1983 | ||
1984 | TEST_F(ConnectionParsing, CompressedResponseMultiple) { | |
1985 | int rc = test_run(home, "94-compressed-response-multiple.t", cfg, &connp); | |
1986 | ASSERT_GE(rc, 0); | |
1987 | ||
1988 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
1989 | ||
1990 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
1991 | ASSERT_TRUE(tx != NULL); | |
1992 | ||
1993 | ASSERT_TRUE(htp_tx_is_complete(tx)); | |
1994 | ||
1995 | ASSERT_EQ(51, tx->response_message_len); | |
1996 | ||
1997 | ASSERT_EQ(25, tx->response_entity_len); | |
1998 | } | |
1999 | ||
2000 | TEST_F(ConnectionParsing, CompressedResponseGzipAsDeflate) { | |
2001 | int rc = test_run(home, "95-compressed-response-gzipasdeflate.t", cfg, &connp); | |
2002 | ASSERT_GE(rc, 0); | |
2003 | ||
2004 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
2005 | ||
2006 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
2007 | ASSERT_TRUE(tx != NULL); | |
2008 | ||
2009 | ASSERT_TRUE(htp_tx_is_complete(tx)); | |
2010 | ||
2011 | ASSERT_EQ(187, tx->response_message_len); | |
2012 | ||
2013 | ASSERT_EQ(225, tx->response_entity_len); | |
2014 | } | |
2015 | ||
2016 | #ifdef HAVE_LIBLZMA | |
2017 | TEST_F(ConnectionParsing, CompressedResponseLzma) { | |
2018 | int rc = test_run(home, "96-compressed-response-lzma.t", cfg, &connp); | |
2019 | ||
2020 | ASSERT_GE(rc, 0); | |
2021 | ||
2022 | ASSERT_EQ(1, htp_list_size(connp->conn->transactions)); | |
2023 | ||
2024 | htp_tx_t *tx = (htp_tx_t *) htp_list_get(connp->conn->transactions, 0); | |
2025 | ASSERT_TRUE(tx != NULL); | |
2026 | ||
2027 | ASSERT_TRUE(htp_tx_is_complete(tx)); | |
2028 | ||
2029 | ASSERT_EQ(90, tx->response_message_len); | |
2030 | ||
2031 | ASSERT_EQ(68, tx->response_entity_len); | |
2032 | } | |
2033 | #endif |