Codebase list hardlink / 7408197
hardlink: Handle extended attributes. This change allows the user to specify that extended attributes are to be taken into account when hardlink looks for "equal" files to link together. The new command-line option --respect-xattrs (or -X) means that for any pair of files to be considered "equal", they must have exactly the same set of attribute names, and exactly the same values assigned to each of those corresponding attributes (in addition to all other criteria). Xattr equality is the penultimate criterion that is tested for a given pair of files, just before the final test for content equality, since those two are usually the most expensive criteria to evaluate and xattr equality is usually cheaper than content equality. The implementation avoids O(N^2) time by sorting the two sets of attribue names (N*log(N)), before testing their equality and the equality of thier values (O(N)); and it always bails out as early as possible (e.g. even before sorting if the total lengths of the xattr names are not equal). This also includes a test script in test/test_hardlink.sh which creates a bunch of files with various combinations of xaddrs, lists them with their inode number, then runs hardlink on them, and then lists them with inodes again. Since name of each file corresponds to is xattrs, you can easily see if expected linking is indeed happening. Issue: STPK-1902 Change-Id: I0a24924c7f5ab55cd9f4c7e0e24df1b127ef50c0 Signed-off-by: Tom Keel <thomas.keel@intel.com> Tom Keel authored 10 years ago Julian Andres Klode committed 9 years ago
5 changed file(s) with 335 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
2626 MYCC = $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH)
2727
2828 # Features to test for when creating configure.h
29 FEATURES := GETOPT_LONG POSIX_FADVISE $(ENABLE)
29 FEATURES := GETOPT_LONG POSIX_FADVISE XATTR $(ENABLE)
3030
3131 all: hardlink
3232
6060 regcomp(&preg, "regex", 0);
6161 }
6262
63 #elif TEST_XATTR
64
65 #include <sys/xattr.h>
66
67 int main(void)
68 {
69 llistxattr(0, 0, 0);
70 }
71
6372 #else
6473
6574 #error "Invalid feature"
3535 Link/compare files even if their time of modification is different. This will
3636 retain the newest timestamp, unless \-m or \-M is given.
3737 .TP
38 .B \-X or \-\-respect\-xattrs
39 Only try to link files with the same extended attributes.
40 .TP
3841 .B \-m or \-\-maximize
3942 Try to maximize the link count of the files.
4043 .TP
8181 #define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
8282 #else
8383 #include <regex.h> /* regcomp(), regsearch() */
84 #endif
85
86 #ifdef HAVE_XATTR
87 #include <attr/xattr.h> /* listxattr, getxattr */
8488 #endif
8589
8690 /**
135139 * @started: Whether we are post command-line processing
136140 * @files: The number of files worked on
137141 * @linked: The number of files replaced by a hardlink to a master
142 * @xattr_comparisons: The number of extended attribute comparisons
138143 * @comparisons: The number of comparisons
139144 * @saved: The (exaggerated) amount of space saved
140145 * @start_time: The time we started at, in seconds since some unspecified point
143148 hl_bool started;
144149 size_t files;
145150 size_t linked;
151 size_t xattr_comparisons;
146152 size_t comparisons;
147153 double saved;
148154 double start_time;
157163 * @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
158164 * @respect_name: Whether to respect file names (default = FALSE)
159165 * @respect_time: Whether to respect file modification times (default = TRUE)
166 * @respect_xattrs: Whether to respect extended attributes (default = FALSE)
160167 * @maximise: Chose the file with the highest link count as master
161168 * @minimise: Chose the file with the lowest link count as master
162169 * @dry_run: Specifies whether hardlink should not link files (default = FALSE)
171178 unsigned int respect_owner:1;
172179 unsigned int respect_name:1;
173180 unsigned int respect_time:1;
181 unsigned int respect_xattrs:1;
174182 unsigned int maximise:1;
175183 unsigned int minimise:1;
176184 unsigned int dry_run:1;
301309
302310 return diff;
303311 }
312
304313 /**
305314 * compare_nodes_ino - Node comparison function
306315 * @_a: The first node (a #struct file)
337346 jlog(JLOG_SUMMARY, "Mode: %s", opts.dry_run ? "dry-run" : "real");
338347 jlog(JLOG_SUMMARY, "Files: %zu", stats.files);
339348 jlog(JLOG_SUMMARY, "Linked: %zu files", stats.linked);
349 #ifdef HAVE_XATTR
350 jlog(JLOG_SUMMARY, "Compared: %zu xattrs", stats.xattr_comparisons);
351 #endif
340352 jlog(JLOG_SUMMARY, "Compared: %zu files", stats.comparisons);
341353 jlog(JLOG_SUMMARY, "Saved: %s", format(stats.saved));
342354 jlog(JLOG_SUMMARY, "Duration: %.2f seconds", gettime() - stats.start_time);
362374 last_signal = 0;
363375 return FALSE;
364376 }
377
378 #ifdef HAVE_XATTR
379
380 /**
381 * malloc_or_die -- Wrapper for malloc()
382 *
383 * This does the same thing as malloc() except that it aborts if memory
384 * can't be allocated.
385 */
386 static void *malloc_or_die(size_t size)
387 {
388 void *mem = malloc(size);
389
390 if (!mem) {
391 jlog(JLOG_SYSFAT, "Cannot allocate memory");
392 exit(1);
393 }
394 return mem;
395 }
396
397 /**
398 * llistxattr_or_die - Wrapper for llistxattr()
399 *
400 * This does the same thing as llistxattr() except that it aborts if any error
401 * other than "not supported" is detected.
402 */
403 static ssize_t llistxattr_or_die(const char *path, char *list, size_t size)
404 {
405 ssize_t len = llistxattr(path, list, size);
406
407 if (len < 0 && errno != ENOTSUP) {
408 jlog(JLOG_SYSFAT, "Cannot get xattr names for %s", path);
409 exit(1);
410 }
411 return len;
412 }
413
414 /**
415 * lgetxattr_or_die - Wrapper for lgetxattr()
416 *
417 * This does the same thing as lgetxattr() except that it aborts upon error.
418 */
419 static ssize_t lgetxattr_or_die(const char *path, const char *name, void *value,
420 size_t size)
421 {
422 ssize_t len = lgetxattr(path, name, value, size);
423
424 if (len < 0) {
425 jlog(JLOG_SYSFAT, "Cannot get xattr value of %s for %s", name, path);
426 exit(1);
427 }
428 return len;
429 }
430
431 /**
432 * get_xattr_name_count - Count the number of xattr names
433 * @names: a non-empty table of concatenated, null-terminated xattr names
434 * @len: the total length of the table
435 *
436 * @Returns the number of xattr names
437 */
438 static int get_xattr_name_count(const char *const names, ssize_t len)
439 {
440 int count = 0;
441 const char *name;
442
443 for (name = names; name < (names + len); name += strlen(name) + 1)
444 count++;
445
446 return count;
447 }
448
449 /**
450 * cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
451 * the names they point to.
452 */
453 static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2)
454 {
455 return strcmp(*(char *const *) ptr1, *(char *const *) ptr2);
456 }
457
458 /**
459 * get_sorted_xattr_name_table - Create a sorted table of xattr names.
460 * @names - table of concatentated, null-terminated xattr names
461 * @n - the number of names
462 *
463 * @Returns allocated table of pointers to the names, sorted alphabetically
464 */
465 static const char **get_sorted_xattr_name_table(const char *names, int n)
466 {
467 const char **table = malloc_or_die(n * sizeof(char *));
468 int i;
469
470 for (i = 0; i < n; i++) {
471 table[i] = names;
472 names += strlen(names) + 1;
473 }
474
475 qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
476
477 return table;
478 }
479
480 /**
481 * file_xattrs_equal - Compare the extended attributes of two files
482 * @a: The first file
483 * @b: The second file
484 *
485 * @Returns: %TRUE if and only if extended attributes are equal
486 */
487 static hl_bool file_xattrs_equal(const struct file *a, const struct file *b)
488 {
489 ssize_t len_a;
490 ssize_t len_b;
491 char *names_a = NULL;
492 char *names_b = NULL;
493 int n_a;
494 int n_b;
495 const char **name_ptrs_a = NULL;
496 const char **name_ptrs_b = NULL;
497 void *value_a = NULL;
498 void *value_b = NULL;
499 hl_bool ret = FALSE;
500 int i;
501
502 assert(a->links != NULL);
503 assert(b->links != NULL);
504
505 jlog(JLOG_DEBUG1, "Comparing xattrs of %s to %s", a->links->path,
506 b->links->path);
507
508 stats.xattr_comparisons++;
509
510 len_a = llistxattr_or_die(a->links->path, NULL, 0);
511 len_b = llistxattr_or_die(b->links->path, NULL, 0);
512
513 if (len_a <= 0 && len_b <= 0)
514 return TRUE; // xattrs not supported or neither file has any
515
516 if (len_a != len_b)
517 return FALSE; // total lengths of xattr names differ
518
519 names_a = malloc_or_die(len_a);
520 names_b = malloc_or_die(len_b);
521
522 len_a = llistxattr_or_die(a->links->path, names_a, len_a);
523 len_b = llistxattr_or_die(b->links->path, names_b, len_b);
524 assert((len_a > 0) && (len_a == len_b));
525
526 n_a = get_xattr_name_count(names_a, len_a);
527 n_b = get_xattr_name_count(names_b, len_b);
528
529 if (n_a != n_b)
530 goto exit; // numbers of xattrs differ
531
532 name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
533 name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
534
535 // We now have two sorted tables of xattr names.
536
537 for (i = 0; i < n_a; i++) {
538 if (handle_interrupt())
539 goto exit; // user wants to quit
540
541 if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
542 goto exit; // names at same slot differ
543
544 len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
545 len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
546
547 if (len_a != len_b)
548 goto exit; // xattrs with same name, different value lengths
549
550 value_a = malloc_or_die(len_a);
551 value_b = malloc_or_die(len_b);
552
553 len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
554 value_a, len_a);
555 len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
556 value_b, len_b);
557 assert((len_a >= 0) && (len_a == len_b));
558
559 if (memcmp(value_a, value_b, len_a) != 0)
560 goto exit; // xattrs with same name, different values
561
562 free(value_a);
563 free(value_b);
564 value_a = NULL;
565 value_b = NULL;
566 }
567
568 ret = TRUE;
569
570 exit:
571 free(names_a);
572 free(names_b);
573 free(name_ptrs_a);
574 free(name_ptrs_b);
575 free(value_a);
576 free(value_b);
577 return ret;
578 }
579 #else
580 static hl_bool file_xattrs_equal(const struct file *a, const struct file *b)
581 {
582 return TRUE;
583 }
584 #endif
365585
366586 /**
367587 * file_contents_equal - Compare contents of two files for equality
453673 (!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
454674 (!opts.respect_name
455675 || strcmp(a->links->path + a->links->basename,
456 b->links->path + b->links->basename) == 0)
457 && file_contents_equal(a, b));
676 b->links->path + b->links->basename) == 0) &&
677 (!opts.respect_xattrs || file_xattrs_equal(a, b)) &&
678 file_contents_equal(a, b));
458679 }
459680
460681 /**
725946 puts(" -o, --ignore-owner Ignore owner changes");
726947 puts(" -t, --ignore-time Ignore timestamps. Will retain the newer timestamp,");
727948 puts(" unless -m or -M is given");
949 #ifdef HAVE_XATTR
950 puts(" -X, --respect-xattrs Respect extended attributes");
951 #endif
728952 puts(" -m, --maximize Maximize the hardlink count, remove the file with");
729953 puts(" lowest hardlink cout");
730954 puts(" -M, --minimize Reverse the meaning of -m");
7901014 */
7911015 static int parse_options(int argc, char *argv[])
7921016 {
793 static const char optstr[] = "VhvnfpotcmMx:i:";
1017 static const char optstr[] = "VhvnfpotXcmMx:i:";
7941018 #ifdef HAVE_GETOPT_LONG
7951019 static const struct option long_options[] = {
7961020 {"version", no_argument, NULL, 'V'},
8011025 {"ignore-mode", no_argument, NULL, 'p'},
8021026 {"ignore-owner", no_argument, NULL, 'o'},
8031027 {"ignore-time", no_argument, NULL, 't'},
1028 {"respect-xattrs", no_argument, NULL, 'X'},
8041029 {"maximize", no_argument, NULL, 'm'},
8051030 {"minimize", no_argument, NULL, 'M'},
8061031 {"exclude", required_argument, NULL, 'x'},
8141039 opts.respect_mode = TRUE;
8151040 opts.respect_owner = TRUE;
8161041 opts.respect_time = TRUE;
1042 opts.respect_xattrs = FALSE;
8171043
8181044 while ((opt = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
8191045 switch (opt) {
8251051 break;
8261052 case 't':
8271053 opts.respect_time = FALSE;
1054 break;
1055 case 'X':
1056 opts.respect_xattrs = TRUE;
8281057 break;
8291058 case 'm':
8301059 opts.maximise = TRUE;
8431072 opts.respect_name = FALSE;
8441073 opts.respect_owner = FALSE;
8451074 opts.respect_time = FALSE;
1075 opts.respect_xattrs = FALSE;
8461076 break;
8471077 case 'n':
8481078 opts.dry_run = 1;
0 #! /bin/bash
1
2 # This creates a bunch of files, all with the same content, and with various
3 # combinations of xaddrs. It lists them with their inode numbers, then runs
4 # hardlink on them, and then lists them with inodes again. Since name of each
5 # file corresponds to is xattrs, you can easily see if expected linking is
6 # indeed happening.
7
8 TMPDIR=$(mktemp -d /tmp/hardlinktest-XXXXXX)
9
10 makeFile() {
11 local argC=$#
12 local fName=${1}-File
13 shift 1; (( argC = $argC - 1 ))
14 local nameValPairs=$*
15 while (( $argC >= 1 )) ; do
16 if (( $argC == 1 )) ; then
17 fName=${fName}-${1}
18 shift 1; (( argC = $argC - 1 ))
19 else
20 if [[ -z $2 ]] ; then
21 fName=${fName}-${1}
22 else
23 fName=${fName}-${1}:${2}
24 fi
25 shift 2; (( argC = $argC - 2 ))
26 fi;
27 done
28 echo "must have some content else can't set xattrs" > $TMPDIR/$fName
29 setXattrs $fName $nameValPairs
30 }
31
32 setXattrs() {
33 local argC=$#
34 local fName=${1}
35 shift 1; (( argC = $argC - 1 ))
36 local nameValPairs=$*
37 while (( $argC >= 1 )) ; do
38 if (( $argC == 1 )) ; then
39 setfattr -n user.$1 $TMPDIR/$fName
40 shift 1; (( argC = $argC - 1 ))
41 else
42 if [[ -z $2 ]] ; then
43 setfattr -n user.$1 $TMPDIR/$fName
44 else
45 setfattr -n user.$1 -v $2 $TMPDIR/$fName
46 fi
47 shift 2; (( argC = $argC - 2 ))
48 fi;
49 done
50 }
51
52 for prefix in A B ; do
53 makeFile $prefix
54
55 makeFile $prefix XXX XXX
56 makeFile $prefix XXX
57
58 makeFile $prefix XXX XXX YYY YYY
59 makeFile $prefix YYY YYY XXX XXX
60 makeFile $prefix XXX "" YYY YYY
61 makeFile $prefix XXX XXX YYY
62 makeFile $prefix XXX "" YYY
63 makeFile $prefix YYY "" XXX
64
65 makeFile $prefix XXX XXX YYY YYY ZZZ ZZZ
66 makeFile $prefix YYY YYY XXX XXX ZZZ ZZZ
67 makeFile $prefix ZZZ ZZZ YYY YYY XXX XXX
68
69 makeFile $prefix XXX XXXXXXXXXX YYY YYYYYYYY ZZZ ZZZ
70 makeFile $prefix YYY YYYYYYYY XXX XXXXXXXXXX ZZZ ZZZ
71 makeFile $prefix ZZZ ZZZ YYY YYYYYYYY XXX XXXXXXXXXX
72
73 makeFile $prefix AA V BB V CC V DD V
74 makeFile $prefix AAAAAAAAAA V BBBBBBBBBB V
75 done
76
77 pushd $TMPDIR > /dev/null
78 getfattr -d *
79 ls -1i | sort
80 popd > /dev/null
81
82 hardlink $TMPDIR --ignore-time --respect-xattrs --maximize
83
84 pushd $TMPDIR > /dev/null
85 ls -1i | sort
86 popd > /dev/null
87
88 rm -rf $TMPDIR