/*
* run_dedupe.c
*
* Implements dedupe of duplicate extents from our results tree
*
* Copyright (C) 2014 SUSE. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Authors: Mark Fasheh <mfasheh@suse.de>
*/
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include <errno.h>
#include <string.h>
#include <inttypes.h>
#include <glib.h>
#include "rbtree.h"
#include "list.h"
#include "csum.h"
#include "filerec.h"
#include "hash-tree.h"
#include "results-tree.h"
#include "dedupe.h"
#include "util.h"
#include "memstats.h"
#include "debug.h"
#include "run_dedupe.h"
extern int block_dedupe;
extern int dedupe_same_file;
extern int fiemap_during_dedupe;
static GMutex mutex;
static GMutex console_mutex;
static struct results_tree *results_tree;
static volatile unsigned long long total_dedupe_passes;
static volatile unsigned long long curr_dedupe_pass;
static unsigned int leading_spaces;
void print_dupes_table(struct results_tree *res)
{
struct rb_root *root = &res->root;
struct rb_node *node = rb_first(root);
struct dupe_extents *dext;
struct extent *extent;
printf("Simple read and compare of file data found %u instances of "
"extents that might benefit from deduplication.\n",
res->num_dupes);
if (res->num_dupes == 0)
return;
while (1) {
if (node == NULL)
break;
dext = rb_entry(node, struct dupe_extents, de_node);
printf("Showing %u identical extents of length %s with id ",
dext->de_num_dupes, pretty_size(dext->de_len));
debug_print_digest_short(stdout, dext->de_hash);
printf("\n");
printf("Start\t\tFilename\n");
list_for_each_entry(extent, &dext->de_extents, e_list) {
printf("%s\t\"%s\"\n",
pretty_size(extent->e_loff),
extent->e_file->filename);
}
node = rb_next(node);
}
}
static void process_dedupe_results(struct dedupe_ctxt *ctxt,
uint64_t *kern_bytes)
{
int done = 0;
int target_status;
uint64_t target_loff, target_bytes;
struct filerec *f;
const char *status_str = "[unknown status]";
while (!done) {
done = pop_one_dedupe_result(ctxt, &target_status, &target_loff,
&target_bytes, &f);
if (kern_bytes)
*kern_bytes += target_bytes;
/*
* Only print in case of error.
*
* Kernels older than 4.2 can't handle the target and
* dedupe files being the same and -EINVAL in that
* case. Don't bubble it up so as to avoid user
* confusion.
*/
if (target_status == 0 ||
(target_status == -EINVAL && f == ctxt->ioctl_file))
continue;
if (target_status == BTRFS_SAME_DATA_DIFFERS)
status_str = "data changed";
else if (target_status < 0)
status_str = strerror(-target_status);
printf("[%p] Dedupe for file \"%s\" had status (%d) "
"\"%s\".\n",
g_thread_self(), f->filename, target_status,
status_str);
}
}
static void get_extent_info(struct dupe_extents *dext)
{
int ret = 0;
struct extent *extent;
struct filerec *file;
list_for_each_entry(extent, &dext->de_extents, e_list) {
file = extent->e_file;
if (filerec_open(file, 0))
continue;
extent_shared_bytes(extent) = 0;
ret = filerec_count_shared(file, extent->e_loff, dext->de_len,
&extent_shared_bytes(extent),
&extent_poff(extent),
&extent_plen(extent));
if (ret) {
fprintf(stderr, "%s: fiemap error %d: %s\n",
extent->e_file->filename, ret, strerror(ret));
}
filerec_close(file);
}
}
static void add_shared_extents(struct dupe_extents *dext, uint64_t *shared)
{
struct extent *extent;
list_for_each_entry(extent, &dext->de_extents, e_list)
*shared += extent_shared_bytes(extent);
}
static int disk_extent_grew(struct dupe_extents *dext, struct extent *extent)
{
/*
* Check length of the virtual extent versus that of the 1st
* physical extent in our range.
*
* If the physical extent is smaller than our virtual
* (duplicate) extent, we want to go ahead and dedupe in order
* to catch two cases:
*
* - The files were appended to (separately) with duplicate
* data - this will result in a pair of new extents on each
* file that can be deduped.
*
* - Kernels before 4.2 rejected unaligned lengths, so we can
* have a residual tail extent to dedupe.
*/
if (extent_plen(extent) < dext->de_len)
return 1;
return 0;
}
/*
* Removes extents which it believes have already been deduped. We err
* on the side of more deduping here.
*/
static void clean_deduped(struct dupe_extents **ret_dext)
{
int left;
int extents_kept = 0;
int first = 1;
struct dupe_extents *dext = *ret_dext;
struct rb_node *inner, *outer;
struct extent *inner_extent, *outer_extent;
if (!dext || dext->de_num_dupes == 0)
return;
if (block_dedupe)
return;
outer = rb_first(&dext->de_extents_root);
while (outer) {
outer_extent = rb_entry(outer, struct extent, e_node);
/*
* First extent will not be considered for removal
* below, which is fine as remove_extent() handles the
* case of only 1 extent left on the dext for us.
*
* Replicate the checks though and count it as kept if
* we don't want it deleted. That will trigger the
* logic below to save the dext if we should wind up
* throwing everything else out.
*/
if (first &&
(extent_poff(outer_extent) == 0 ||
disk_extent_grew(dext, outer_extent)))
extents_kept++;
first = 0;
inner = rb_next(outer);
while (inner) {
inner_extent = rb_entry(inner, struct extent, e_node);
inner = rb_next(inner);
/*
* Track if any extents have survived the
* culling. If we're down to the last two and
* at least one of them was deemed worthy,
* exit here so that he may be deduped.
*/
if (dext->de_num_dupes == 2 && extents_kept)
return;
/*
* e_poff could be zero if fiemap from
* add_shared_extents fails. In that case,
* skip the extent (it might want to be
* deduped).
*/
if (extent_poff(inner_extent)
&& extent_poff(outer_extent) == extent_poff(inner_extent)
&& !disk_extent_grew(dext, inner_extent)) {
dprintf("Remove extent "
"(\"%s\", %"PRIu64", %"PRIu64")\n",
inner_extent->e_file->filename,
extent_poff(inner_extent),
extent_plen(inner_extent));
g_mutex_lock(&mutex);
left = remove_extent(results_tree,
inner_extent);
g_mutex_unlock(&mutex);
if (left == 0) {
*ret_dext = dext = NULL;
return;
}
} else
extents_kept++;
}
outer = rb_next(outer);
}
}
#define DEDUPE_EXTENTS_CLEANED (-1)
static int dedupe_extent_list(struct dupe_extents *dext, uint64_t *fiemap_bytes,
uint64_t *kern_bytes, unsigned long long passno)
{
int ret = 0;
int last = 0;
int rc;
uint64_t shared_prev, shared_post;
struct extent *extent;
struct dedupe_ctxt *ctxt = NULL;
uint64_t len = dext->de_len;
OPEN_ONCE(open_files);
struct extent *tgt_extent = NULL;
abort_on(dext->de_num_dupes < 2);
/* Dedupe extents with id %s*/
g_mutex_lock(&console_mutex);
printf("[%p] (%0*llu/%llu) Try to dedupe extents with id ",
g_thread_self(), leading_spaces, passno, total_dedupe_passes);
debug_print_digest_short(stdout, dext->de_hash);
printf("\n");
g_mutex_unlock(&console_mutex);
shared_prev = shared_post = 0ULL;
/*
* Remove any extents which have already been deduped. This
* will free dext for us if the number of available extents
* goes below 2. If that happens, we return a special value so
* the caller knows not to reference dext any more.
*/
if (fiemap_during_dedupe) {
ret = init_all_extent_dedupe_info(dext);
if (ret)
goto out;
get_extent_info(dext);
clean_deduped(&dext);
if (!dext) {
printf("[%p] Skipping - extents are already deduped.\n",
g_thread_self());
return DEDUPE_EXTENTS_CLEANED;
}
/*
* Do this after clean_deduped as we may have removed some
* extents.
*/
add_shared_extents(dext, &shared_prev);
}
list_for_each_entry(extent, &dext->de_extents, e_list) {
if (list_is_last(&extent->e_list, &dext->de_extents))
last = 1;
ret = filerec_open_once(extent->e_file, target_rw, &open_files);
if (ret) {
fprintf(stderr, "%s: Skipping dedupe.\n",
extent->e_file->filename);
/*
* If this was our last duplicate extent in
* the list, and we added dupes from a
* previous iteration of the loop we need to
* run dedupe before exiting.
*/
if (ctxt && last)
goto run_dedupe;
continue;
}
vprintf("[%p] Add extent for file \"%s\" at offset %s (%d)\n",
g_thread_self(), extent->e_file->filename,
pretty_size(extent->e_loff), extent->e_file->fd);
if (ctxt == NULL) {
if (tgt_extent == NULL) {
/*
* We had some errors adding files
* previously and are down to the last
* dedupe candidate. Proceed only if
* we can guarantee two extents for
* dedupe (target, and this file).
*/
if (last)
goto close_files;
tgt_extent = extent;
}
ctxt = new_dedupe_ctxt(dext->de_num_dupes,
tgt_extent->e_loff, len,
tgt_extent->e_file);
if (ctxt == NULL) {
fprintf(stderr, "Out of memory while "
"allocating dedupe context.\n");
ret = ENOMEM;
goto out;
}
/*
* If we just picked the target, it got added
* with the new context. Otherwise fall
* through to let other extents onto the
* dedupe ctxt.
*/
if (tgt_extent == extent)
continue;
}
rc = add_extent_to_dedupe(ctxt, extent->e_loff, extent->e_file);
if (rc) {
if (rc < 0) {
/* This can only be ENOMEM. */
fprintf(stderr, "%s: Request not queued.\n",
extent->e_file->filename);
ret = ENOMEM;
goto out;
}
if (!last)
continue;
}
run_dedupe:
/*
* We can get here with only the target extent (0
* queued) for many reasons. Skip the dedupe in that
* case but always do cleanup.
*/
if (ctxt->num_queued) {
g_mutex_lock(&console_mutex);
printf("[%p] Dedupe %u extents (id: ", g_thread_self(),
ctxt->num_queued);
debug_print_digest_short(stdout, dext->de_hash);
printf(") with target: (%s, %s), "
"\"%s\"\n",
pretty_size(ctxt->orig_file_off),
pretty_size(ctxt->orig_len),
ctxt->ioctl_file->filename);
g_mutex_unlock(&console_mutex);
ret = dedupe_extents(ctxt);
if (ret == 0) {
process_dedupe_results(ctxt, kern_bytes);
} else {
ret = errno;
fprintf(stderr,
"FAILURE: Dedupe ioctl returns %d: %s\n",
ret, strerror(ret));
}
}
close_files:
filerec_close_open_list(&open_files);
free_dedupe_ctxt(ctxt);
ctxt = NULL;
if (!last) {
/* reopen target file as it got closed above */
ret = filerec_open_once(tgt_extent->e_file, target_rw,
&open_files);
if (ret) {
fprintf(stderr,
"%s: Could not re-open as target.\n",
extent->e_file->filename);
break;
}
}
}
abort_on(ctxt != NULL);
abort_on(!RB_EMPTY_ROOT(&open_files.root));
if (fiemap_during_dedupe) {
get_extent_info(dext);
add_shared_extents(dext, &shared_post);
}
/*
* It's entirely possible that some other process is
* manipulating files underneath us. Take care not to
* report some randomly enormous 64 bit value.
*/
if (shared_prev < shared_post)
*fiemap_bytes += shared_post - shared_prev;
/* The only error we want to bubble up is ENOMEM */
ret = 0;
out:
/*
* ENOMEM error during context allocation may have caused open
* files to stay in our list.
*/
filerec_close_open_list(&open_files);
/*
* We might have allocated a context above but not
* filled it with any extents, make sure to free it
* here.
*/
free_dedupe_ctxt(ctxt);
abort_on(!RB_EMPTY_ROOT(&open_files.root));
return ret;
}
static GMutex dedupe_counts_mutex;
struct dedupe_counts {
uint64_t kern_bytes;
uint64_t fiemap_bytes;
};
static int extent_dedupe_worker(struct dupe_extents *dext,
uint64_t *fiemap_bytes, uint64_t *kern_bytes)
{
int ret;
unsigned long long passno = __sync_add_and_fetch(&curr_dedupe_pass, 1);
ret = dedupe_extent_list(dext, fiemap_bytes, kern_bytes, passno);
if (ret) {
if (ret == DEDUPE_EXTENTS_CLEANED)
return 0;
/* dedupe_extent_list already printed to stderr for us */
return ret;
}
if (!list_empty(&dext->de_extents)) {
g_mutex_lock(&mutex);
dupe_extents_free(dext, results_tree);
g_mutex_unlock(&mutex);
}
return 0;
}
struct block_dedupe_list
{
unsigned char bd_hash[DIGEST_LEN_MAX];
struct list_head bd_block_list;
struct hash_tree *bd_hash_tree;
};
static void free_bdl(struct block_dedupe_list *bdl);
/*
* tgt_file/tgt_off here are used only when we are asked not to dedupe
* within the same file - see block_dedupe_nosame(). Otherwise this is
* very straight-forward. We walk the dupe blocks list and insert into
* the result tree then run that through dedupe_extent_list().
*/
static int __block_dedupe(struct block_dedupe_list *bdl,
struct results_tree *res,
struct filerec *tgt_file,
uint64_t tgt_off,
uint64_t *fiemap_bytes,
uint64_t *kern_bytes, unsigned long long passno)
{
int ret, one_old = 0;
struct dupe_extents *dext = NULL;
struct file_block *block;
if (tgt_file) {
/* Insert this first so it gets picked as target. */
ret = insert_one_result(res, bdl->bd_hash, tgt_file, tgt_off,
blocksize);
if (ret)
return ret;
if (filerec_deduped(tgt_file))
one_old = 1;
}
list_for_each_entry(block, &bdl->bd_block_list, b_list) {
if (block->b_file == tgt_file)
continue;
if (filerec_deduped(block->b_file)) {
/*
* Take one old result to dedupe from. That
* way we don't get into a situation where
* we've thrown out all blocks except a single
* rescanned one.
*/
if (one_old)
continue;
one_old = 1;
}
ret = insert_one_result(res, bdl->bd_hash, block->b_file,
block->b_loff, blocksize);
if (ret)
goto out;
}
dext = rb_entry(rb_first(&res->root), struct dupe_extents, de_node);
abort_on(!dext);
if (dext->de_num_dupes >= 2) {
ret = dedupe_extent_list(dext, fiemap_bytes, kern_bytes,
passno);
if (ret == DEDUPE_EXTENTS_CLEANED)
ret = 0;
}
out:
dext = rb_entry(rb_first(&res->root), struct dupe_extents, de_node);
if (dext) {
g_mutex_lock(&mutex);
dupe_extents_free(dext, res);
g_mutex_unlock(&mutex);
}
return ret;
}
/*
* Newer kernels (Linux v4.2+) support dedupe of the target file
* (dedupe with a file). We don't have a way of testing that yet so
* for now assume we can't dedupe with the same file. Extent dedupe
* handles this by never comparing a file to itself. We don't have
* that luxury.
*
* Walk the dupe list once until we find two files which are
* different. This is dumb but easy on memory and probably fast
* enough.
*/
static void pick_target_files(struct block_dedupe_list *bdl,
struct filerec **tgt1, uint64_t *loff1,
struct filerec **tgt2, uint64_t *loff2)
{
struct filerec *file1, *file2;
uint64_t off1, off2;
struct file_block *block;
file1 = file2 = NULL;
off1 = off2 = 0;
list_for_each_entry(block, &bdl->bd_block_list, b_list) {
if (!file1) {
file1 = block->b_file;
off1 = block->b_loff;
} else if (file1 != block->b_file) {
file2 = block->b_file;
off2 = block->b_loff;
}
if (file1 && file2)
break;
}
*tgt1 = file1;
*tgt2 = file2;
*loff1 = off1;
*loff2 = off2;
return;
}
/* for kernels without same file dedupe (< v4.2) */
static int block_dedupe_nosame(struct block_dedupe_list *bdl,
struct results_tree *res, uint64_t *fiemap_bytes,
uint64_t *kern_bytes, unsigned long long passno)
{
int ret;
struct filerec *tgt_file1, *tgt_file2;
uint64_t tgt_off1, tgt_off2;
tgt_off1 = tgt_off2 = 0;
pick_target_files(bdl, &tgt_file1, &tgt_off1, &tgt_file2, &tgt_off2);
if (!tgt_file2) {
/* Can't dedupe this in nosame mode */
if (verbose) {
printf("[%p] Can not dedupe hash ", g_thread_self());
debug_print_digest_short(stdout, bdl->bd_hash);
printf(" in nosame mode - all hashes belong to %s.\n",
tgt_file1->filename);
}
return 0;
}
ret = __block_dedupe(bdl, res, tgt_file1, tgt_off1, fiemap_bytes,
kern_bytes, passno);
if (!ret)
ret = __block_dedupe(bdl, res, tgt_file2, tgt_off2,
fiemap_bytes, kern_bytes, passno);
return ret;
}
static int block_dedupe_worker(struct block_dedupe_list *bdl,
uint64_t *fiemap_bytes, uint64_t *kern_bytes)
{
int ret;
struct results_tree res;
unsigned long long passno = __sync_add_and_fetch(&curr_dedupe_pass, 1);
init_results_tree(&res);
if (!dedupe_same_file)
ret = block_dedupe_nosame(bdl, &res, fiemap_bytes,
kern_bytes, passno);
else
ret = __block_dedupe(bdl, &res, NULL, 0, fiemap_bytes,
kern_bytes, passno);
free_bdl(bdl);
return ret;
}
static int dedupe_worker(void *priv, struct dedupe_counts *counts)
{
int ret;
uint64_t fiemap_bytes = 0ULL;
uint64_t kern_bytes = 0ULL;
if (block_dedupe)
ret = block_dedupe_worker(priv, &fiemap_bytes, &kern_bytes);
else
ret = extent_dedupe_worker(priv, &fiemap_bytes, &kern_bytes);
g_mutex_lock(&dedupe_counts_mutex);
counts->fiemap_bytes += fiemap_bytes;
counts->kern_bytes += kern_bytes;
g_mutex_unlock(&dedupe_counts_mutex);
return ret;
}
static GThreadPool *dedupe_pool = NULL;
struct block_dedupe_list *alloc_bdl(struct hash_tree *tree,
struct dupe_blocks_list *dups)
{
struct block_dedupe_list *bdl = malloc(sizeof(*bdl));
if (bdl) {
bdl->bd_hash_tree = tree;
INIT_LIST_HEAD(&bdl->bd_block_list);
memcpy(&bdl->bd_hash, &dups->dl_hash, DIGEST_LEN_MAX);
}
return bdl;
}
static void free_bdl(struct block_dedupe_list *bdl)
{
struct file_block *block, *tmp;
if (bdl) {
list_for_each_entry_safe(block, tmp, &bdl->bd_block_list, b_list) {
list_del_init(&block->b_list);
g_mutex_lock(&mutex);
remove_hashed_block(bdl->bd_hash_tree, block);
g_mutex_unlock(&mutex);
}
free(bdl);
}
}
static int push_bdl(struct block_dedupe_list *bdl)
{
GError *err = NULL;
g_thread_pool_push(dedupe_pool, bdl, &err);
if (err) {
fprintf(stderr, "Fatal error while deduping: %s\n",
err->message);
g_error_free(err);
return 1;
}
return 0;
}
/*
* Don't queue more than DEDUPE_MAX dedupes on any one thread. We want
* this because large lists (in the multiple millions) can wind up
* taking longer on one thread than all the other lists. This happens
* because a common pattern is only one or two extremely large buckets
* (think zeros) with the rest having a drastically smaller number of
* duplicates.
*/
#define DEDUPE_MAX (1000000)
static int __push_blocks(struct hash_tree *hashes,
struct dupe_blocks_list *dups)
{
int i, ret = 0;
unsigned long long j;
struct block_dedupe_list *bdl;
struct file_block *block;
if (dups->dl_num_elem < DEDUPE_MAX) {
/* the easy way */
bdl = alloc_bdl(hashes, dups);
if (!bdl) {
fprintf(stderr, "Fatal: out of memory while deduping\n");
ret = ENOMEM;
goto out;
}
list_splice_init(&dups->dl_list, &bdl->bd_block_list);
if (push_bdl(bdl))
goto out;
} else {
unsigned long long smax = dups->dl_num_elem / io_threads;
if (verbose) {
printf("Hash ");
debug_print_digest_short(stdout, dups->dl_hash);
printf(" has %llu elements. It will be spread amongst "
"all dedupe threads.\n", dups->dl_num_elem);
}
for (i = 0; i < io_threads; i++) {
j = 0;
list_for_each_entry(block, &dups->dl_list, b_list) {
j++;
/*
* Give the last thread any leftovers
* even if that goes above our soft
* max.
*/
if (list_is_last(&block->b_list, &dups->dl_list)
|| (j == smax && i != (io_threads - 1))) {
bdl = alloc_bdl(hashes, dups);
if (!bdl) {
fprintf(stderr,
"Fatal: out of memory"
" while deduping\n");
ret = ENOMEM;
goto out;
}
list_cut_position(&bdl->bd_block_list,
&dups->dl_list,
&block->b_list);
if (push_bdl(bdl))
goto out;
bdl = NULL;
__sync_add_and_fetch(
&total_dedupe_passes, 1);
break;
}
}
}
}
bdl = NULL; /* freed by thread */
out:
free_bdl(bdl);
return ret;
}
static int push_blocks(struct hash_tree *hashes)
{
struct dupe_blocks_list *dups;
list_for_each_entry(dups, &hashes->size_list, dl_size_list) {
if (dups->dl_num_elem < 2)
continue;
if (__push_blocks(hashes, dups))
return 1;
}
return 0;
}
/* Errors from this function are fatal. */
static int push_extents(struct results_tree *res)
{
struct rb_root *root = &res->root;
struct rb_node *node = rb_first(root);
struct dupe_extents *dext;
GError *err = NULL;
while (node) {
dext = rb_entry(node, struct dupe_extents, de_node);
/*
* dext may be free'd by the dedupe threads, so get
* the next node now. In addition we want to lock
* around the rbtree code here so rb_erase doesn't
* change the tree underneath us.
*/
g_mutex_lock(&mutex);
node = rb_next(node);
g_mutex_unlock(&mutex);
g_thread_pool_push(dedupe_pool, dext, &err);
if (err) {
fprintf(stderr, "Fatal error while deduping: %s\n",
err->message);
g_error_free(err);
return 1;
}
}
return 0;
}
void dedupe_results(struct results_tree *res, struct hash_tree *hashes)
{
int ret;
struct dedupe_counts counts = { 0ULL, };
GError *err = NULL;
if (!block_dedupe) {
results_tree = res;
print_dupes_table(res);
if (RB_EMPTY_ROOT(&res->root)) {
printf("Nothing to dedupe.\n");
return;
}
}
printf("Using %u threads for dedupe phase\n", io_threads);
dedupe_pool = g_thread_pool_new((GFunc) dedupe_worker, &counts,
io_threads, TRUE, &err);
if (err) {
fprintf(stderr, "Unable to create dedupe thread pool: %s\n",
err->message);
g_error_free(err);
return;
}
if (block_dedupe) {
total_dedupe_passes = hashes->num_hashes;
leading_spaces = num_digits(total_dedupe_passes);
ret = push_blocks(hashes);
} else {
total_dedupe_passes = res->num_dupes;
leading_spaces = num_digits(total_dedupe_passes);
ret = push_extents(res);
}
if (ret) {
fprintf(stderr, "Fatal error while deduping: %s\n",
err->message);
g_error_free(err);
}
g_thread_pool_free(dedupe_pool, FALSE, TRUE);
if (ret == 0) {
if (fiemap_during_dedupe) {
printf("Kernel processed data (excludes target files): "
"%s\nComparison of extent info shows a net "
"change in shared extents of: %s\n",
pretty_size(counts.kern_bytes),
pretty_size(counts.fiemap_bytes));
} else {
printf("Kernel processed data (excludes target files): "
"%s\n", pretty_size(counts.kern_bytes));
}
}
}
int fdupes_dedupe(void)
{
int ret;
struct filerec *file;
struct dedupe_ctxt *ctxt = NULL;
uint64_t bytes = 0;
OPEN_ONCE(open_files);
list_for_each_entry(file, &filerec_list, rec_list) {
ret = filerec_open_once(file, 0, &open_files);
if (ret) {
fprintf(stderr, "%s: Skipping dedupe.\n",
file->filename);
continue;
}
printf("Queue entire file for dedupe: %s\n", file->filename);
if (ctxt == NULL) {
ctxt = new_dedupe_ctxt(MAX_DEDUPES_PER_IOCTL,
0, file->size, file);
if (ctxt == NULL) {
fprintf(stderr, "Out of memory while "
"allocating dedupe context.\n");
ret = ENOMEM;
goto out;
}
continue;
}
ret = add_extent_to_dedupe(ctxt, 0, file);
if (ret < 0) {
fprintf(stderr, "%s: Request not queued.\n",
file->filename);
ret = ENOMEM;
goto out;
} else if (ret == 0 ||
list_is_last(&file->rec_list, &filerec_list)) {
ret = dedupe_extents(ctxt);
if (ret) {
ret = errno;
fprintf(stderr,
"FAILURE: Dedupe ioctl returns %d: %s\n",
ret, strerror(ret));
goto out;
}
filerec_close_open_list(&open_files);
process_dedupe_results(ctxt, &bytes);
free_dedupe_ctxt(ctxt);
ctxt = NULL;
printf("Dedupe pass on %llu files completed\n",
num_filerecs);
}
}
ret = 0;
out:
filerec_close_open_list(&open_files);
free_dedupe_ctxt(ctxt);
free_all_filerecs();
return ret;
}