forked from simonsj/fdupes-jody
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathact_dedupefiles.c
214 lines (184 loc) · 7.69 KB
/
act_dedupefiles.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
/* BTRFS deduplication of file blocks
* This file is part of jdupes; see jdupes.c for license information */
#include "jdupes.h"
#ifdef ENABLE_DEDUPE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#ifdef __linux__
/* Use built-in static dedupe header if requested */
#ifdef STATIC_DEDUPE_H
#include "dedupe-static.h"
#else
#include <linux/fs.h>
#endif
/* If the Linux headers are too old, automatically use the static one */
#ifndef FILE_DEDUPE_RANGE_DIFFERS
#warning Automatically enabled STATIC_DEDUPE_H due to insufficient header support
#include "dedupe-static.h"
#endif
#include <sys/ioctl.h>
#include <sys/utsname.h>
#else
#error "Filesystem-managed deduplication only available for Linux."
#endif
#include "act_dedupefiles.h"
#define KERNEL_DEDUP_MAX_SIZE 16777216
/* Message to append to BTRFS warnings based on write permissions */
static const char *readonly_msg[] = {
"",
" (no write permission)"
};
static char *dedupeerrstr(int err) {
tempname[sizeof(tempname)-1] = '\0';
if (err == FILE_DEDUPE_RANGE_DIFFERS) {
snprintf(tempname, sizeof(tempname), "FILE_DEDUPE_RANGE_DIFFERS (data modified in the meantime?)");
return tempname;
} else if (err < 0) {
return strerror(-err);
} else {
snprintf(tempname, sizeof(tempname), "Unknown error %d", err);
return tempname;
}
}
extern void dedupefiles(file_t * restrict files)
{
struct utsname utsname;
struct file_dedupe_range *same;
char **dupe_filenames; /* maps to same->info indices */
file_t *curfile;
unsigned int n_dupes, max_dupes, cur_info;
unsigned int cur_file = 0, max_files, total_files = 0;
int fd;
int ret, status, readonly;
int64_t cur_offset;
LOUD(fprintf(stderr, "\nRunning dedupefiles()\n");)
/* Refuse to dedupe on 2.x kernels; they could damage user data */
if (uname(&utsname)) {
fprintf(stderr, "Failed to get kernel version! Aborting.\n");
exit(EXIT_FAILURE);
}
LOUD(fprintf(stderr, "dedupefiles: uname got release '%s'\n", utsname.release));
if (*(utsname.release) == '2' && *(utsname.release + 1) == '.') {
fprintf(stderr, "Refusing to dedupe on a 2.x kernel; data loss could occur. Aborting.\n");
exit(EXIT_FAILURE);
}
/* Find the largest dupe set, alloc space to hold structs for it */
get_max_dupes(files, &max_dupes, &max_files);
/* Kernel dupe count is a uint16_t so exit if the type's limit is exceeded */
if (max_dupes > 65535) {
fprintf(stderr, "Largest duplicate set (%d) exceeds the 65535-file dedupe limit.\n", max_dupes);
fprintf(stderr, "Ask the program author to add this feature if you really need it. Exiting!\n");
exit(EXIT_FAILURE);
}
same = calloc(sizeof(struct file_dedupe_range) +
sizeof(struct file_dedupe_range_info) * max_dupes, 1);
dupe_filenames = malloc(max_dupes * sizeof(char *));
LOUD(fprintf(stderr, "dedupefiles structs: alloc1 size %lu => %p, alloc2 size %lu => %p\n",
sizeof(struct file_dedupe_range) + sizeof(struct file_dedupe_range_info) * max_dupes,
(void *)same, max_dupes * sizeof(char *), (void *)dupe_filenames);)
if (!same || !dupe_filenames) oom("dedupefiles() structures");
/* Main dedupe loop */
while (files) {
if (ISFLAG(files->flags, F_HAS_DUPES) && files->size) {
cur_file++;
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
fprintf(stderr, "Dedupe [%u/%u] %u%% \r", cur_file, max_files,
cur_file * 100 / max_files);
}
/* Open each file to be deduplicated */
cur_info = 0;
for (curfile = files->duplicates; curfile; curfile = curfile->duplicates) {
int errno2;
/* Never allow hard links to be passed to dedupe */
if (curfile->device == files->device && curfile->inode == files->inode) {
LOUD(fprintf(stderr, "skipping hard linked file pair: '%s' = '%s'\n", curfile->d_name, files->d_name);)
continue;
}
dupe_filenames[cur_info] = curfile->d_name;
readonly = 0;
if (access(curfile->d_name, W_OK) != 0) readonly = 1;
fd = open(curfile->d_name, O_RDWR);
LOUD(fprintf(stderr, "opening loop: open('%s', O_RDWR) [%d]\n", curfile->d_name, fd);)
/* If read-write open fails, privileged users can dedupe in read-only mode */
if (fd == -1) {
/* Preserve errno in case read-only fallback fails */
LOUD(fprintf(stderr, "opening loop: open('%s', O_RDWR) failed: %s\n", curfile->d_name, strerror(errno));)
errno2 = errno;
fd = open(curfile->d_name, O_RDONLY);
if (fd == -1) {
LOUD(fprintf(stderr, "opening loop: fallback open('%s', O_RDONLY) failed: %s\n", curfile->d_name, strerror(errno));)
fprintf(stderr, "Unable to open '%s': %s%s\n", curfile->d_name,
strerror(errno2), readonly_msg[readonly]);
continue;
}
LOUD(fprintf(stderr, "opening loop: fallback open('%s', O_RDONLY) succeeded\n", curfile->d_name);)
}
same->info[cur_info].dest_fd = fd;
cur_info++;
total_files++;
}
n_dupes = cur_info;
same->dest_count = (uint16_t)n_dupes; /* kernel type is __u16 */
fd = open(files->d_name, O_RDONLY);
LOUD(fprintf(stderr, "source: open('%s', O_RDONLY) [%d]\n", files->d_name, fd);)
if (fd == -1) {
fprintf(stderr, "unable to open(\"%s\", O_RDONLY): %s\n", files->d_name, strerror(errno));
goto cleanup;
}
/* Call dedupe ioctl to pass the files to the kernel */
ret = 0;
same->src_length = (uint64_t)KERNEL_DEDUP_MAX_SIZE;
for (cur_offset = 0; cur_offset < files->size; cur_offset += KERNEL_DEDUP_MAX_SIZE) {
same->src_offset = (uint64_t)cur_offset;
for (cur_info = 0; cur_info < n_dupes; cur_info++) {
same->info[cur_info].dest_offset = (uint64_t)cur_offset;
}
if (KERNEL_DEDUP_MAX_SIZE + cur_offset < files->size)
same->src_length = (uint64_t)KERNEL_DEDUP_MAX_SIZE;
else
same->src_length = (uint64_t)(files->size - cur_offset);
ret = ioctl(fd, FIDEDUPERANGE, same);
if (ret < 0)
break;
LOUD(fprintf(stderr, "dedupe: ioctl('%s' [%d], FIDEDUPERANGE, same) => %d\n", files->d_name, fd, ret);)
}
if (close(fd) == -1) fprintf(stderr, "Unable to close(\"%s\"): %s\n", files->d_name, strerror(errno));
if (ret < 0) {
fprintf(stderr, "dedupe failed against file '%s' (%d matches): %s\n", files->d_name, n_dupes, strerror(errno));
goto cleanup;
}
for (cur_info = 0; cur_info < n_dupes; cur_info++) {
status = same->info[cur_info].status;
if (status != 0) {
if (same->info[cur_info].bytes_deduped == 0) {
fprintf(stderr, "warning: dedupe failed: %s => %s: %s [%d]%s\n",
files->d_name, dupe_filenames[cur_info], dedupeerrstr(status),
status, readonly_msg[readonly]);
} else {
fprintf(stderr, "warning: dedupe only did %" PRIdMAX " bytes: %s => %s: %s [%d]%s\n",
(intmax_t)same->info[cur_info].bytes_deduped, files->d_name,
dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]);
}
}
}
cleanup:
for (cur_info = 0; cur_info < n_dupes; cur_info++) {
if (close((int)same->info[cur_info].dest_fd) == -1) {
fprintf(stderr, "unable to close(\"%s\"): %s", dupe_filenames[cur_info],
strerror(errno));
}
}
} /* has dupes */
files = files->next;
}
if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "Deduplication done (%d files processed)\n", total_files);
free(same);
free(dupe_filenames);
return;
}
#endif /* ENABLE_DEDUPE */