Codebase list libisal / 9968e7a
Change gen cust hufftables to accept dictionary Change-Id: I4eed03bdb91030b16b3ecfd8076adc890e4f59a2 Signed-off-by: Greg Tucker <greg.b.tucker@intel.com> Greg Tucker 3 years ago
1 changed file(s) with 152 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
6363 #include <stdlib.h>
6464 #include "igzip_lib.h"
6565
66 #include "huff_codes.h"
67 #include "huffman.h"
68
6669 /*These max code lengths are limited by how the data is stored in
6770 * hufftables.asm. The deflate standard max is 15.*/
6871
232235 fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE);
233236 }
234237
238 static uint32_t convert_dist_to_dist_sym(uint32_t dist)
239 {
240 assert(dist <= 32768 && dist > 0);
241 if (dist <= 32768) {
242 uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
243 return (msb * 2) + ((dist - 1) >> msb);
244 } else {
245 return ~0;
246 }
247 }
248
249 /**
250 * @brief Returns the deflate symbol value for a repeat length.
251 */
252 static uint32_t convert_length_to_len_sym(uint32_t length)
253 {
254 assert(length > 2 && length < 259);
255
256 /* Based on tables on page 11 in RFC 1951 */
257 if (length < 11)
258 return 257 + length - 3;
259 else if (length < 19)
260 return 261 + (length - 3) / 2;
261 else if (length < 35)
262 return 265 + (length - 3) / 4;
263 else if (length < 67)
264 return 269 + (length - 3) / 8;
265 else if (length < 131)
266 return 273 + (length - 3) / 16;
267 else if (length < 258)
268 return 277 + (length - 3) / 32;
269 else
270 return 285;
271 }
272
273 void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length,
274 struct isal_huff_histogram *histogram)
275 {
276 uint32_t literal = 0, hash;
277 uint16_t seen, *last_seen = histogram->hash_table;
278 uint8_t *current, *end_stream, *next_hash, *end, *end_dict;
279 uint32_t match_length;
280 uint32_t dist;
281 uint64_t *lit_len_histogram = histogram->lit_len_histogram;
282 uint64_t *dist_histogram = histogram->dist_histogram;
283
284 if (length <= 0)
285 return;
286
287 end_stream = start_stream + dict_length + length;
288 end_dict = start_stream + dict_length;
289
290 memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
291
292 for (current = start_stream; current < end_dict - 4; current++) {
293 literal = load_u32(current);
294 hash = compute_hash(literal) & LVL0_HASH_MASK;
295 last_seen[hash] = (current - start_stream) & 0xFFFF;
296 }
297
298 for (current = start_stream + dict_length; current < end_stream - 3; current++) {
299 literal = load_u32(current);
300 hash = compute_hash(literal) & LVL0_HASH_MASK;
301 seen = last_seen[hash];
302 last_seen[hash] = (current - start_stream) & 0xFFFF;
303 dist = (current - start_stream - seen) & 0xFFFF;
304 if (dist - 1 < D - 1) {
305 assert(start_stream <= current - dist);
306 match_length =
307 compare258(current - dist, current, end_stream - current);
308 if (match_length >= SHORTEST_MATCH) {
309 next_hash = current;
310 #ifdef ISAL_LIMIT_HASH_UPDATE
311 end = next_hash + 3;
312 #else
313 end = next_hash + match_length;
314 #endif
315 if (end > end_stream - 3)
316 end = end_stream - 3;
317 next_hash++;
318 for (; next_hash < end; next_hash++) {
319 literal = load_u32(next_hash);
320 hash = compute_hash(literal) & LVL0_HASH_MASK;
321 last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
322 }
323
324 dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
325 lit_len_histogram[convert_length_to_len_sym(match_length)] +=
326 1;
327 current += match_length - 1;
328 continue;
329 }
330 }
331 lit_len_histogram[literal & 0xFF] += 1;
332 }
333
334 for (; current < end_stream; current++)
335 lit_len_histogram[*current] += 1;
336
337 lit_len_histogram[256] += 1;
338 return;
339 }
340
235341 int main(int argc, char *argv[])
236342 {
237343 long int file_length;
344 int argi = 1;
238345 uint8_t *stream = NULL;
239346 struct isal_hufftables hufftables;
240347 struct isal_huff_histogram histogram;
241348 struct isal_zstream tmp_stream;
242 FILE *file;
349 FILE *file = NULL;
350 FILE *dict_file = NULL;
351 long int dict_file_length = 0;
352 uint8_t *dict_stream = NULL;
243353
244354 if (argc == 1) {
245355 printf("Error, no input file.\n");
246356 return 1;
247357 }
248358
359 if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') {
360 dict_file = fopen(argv[2], "r");
361
362 fseek(dict_file, 0, SEEK_END);
363 dict_file_length = ftell(dict_file);
364 fseek(dict_file, 0, SEEK_SET);
365 dict_file_length -= ftell(dict_file);
366 dict_stream = malloc(dict_file_length);
367 if (dict_stream == NULL) {
368 printf("Failed to allocate memory to read in dictionary file\n");
369 fclose(dict_file);
370 return 1;
371 }
372 if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) {
373 printf("Error occurred when reading dictionary file");
374 fclose(dict_file);
375 free(dict_stream);
376 return 1;
377 }
378 isal_update_histogram(dict_stream, dict_file_length, &histogram);
379
380 printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]);
381 argi += 2;
382 fclose(dict_file);
383 free(dict_stream);
384 }
385
249386 memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
250387
251 while (argc > 1) {
252 printf("Processing %s\n", argv[argc - 1]);
253 file = fopen(argv[argc - 1], "r");
388 while (argi < argc) {
389 printf("Processing %s\n", argv[argi]);
390 file = fopen(argv[argi], "r");
254391 if (file == NULL) {
255392 printf("Error opening file\n");
256393 return 1;
259396 file_length = ftell(file);
260397 fseek(file, 0, SEEK_SET);
261398 file_length -= ftell(file);
262 stream = malloc(file_length);
399 stream = malloc(file_length + dict_file_length);
263400 if (stream == NULL) {
264401 printf("Failed to allocate memory to read in file\n");
265402 fclose(file);
266403 return 1;
267404 }
268 if (fread(stream, 1, file_length, file) != file_length) {
405 if (dict_file_length > 0)
406 memcpy(stream, dict_stream, dict_file_length);
407
408 if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) {
269409 printf("Error occurred when reading file");
270410 fclose(file);
271411 free(stream);
274414
275415 /* Create a histogram of frequency of symbols found in stream to
276416 * generate the huffman tree.*/
277 isal_update_histogram(stream, file_length, &histogram);
417 if (0 == dict_file_length)
418 isal_update_histogram(stream, file_length, &histogram);
419 else
420 isal_update_histogram_dict(stream, dict_file_length, file_length,
421 &histogram);
278422
279423 fclose(file);
280424 free(stream);
281 argc--;
425 argi++;
282426 }
283427
284428 isal_create_hufftables(&hufftables, &histogram);