[PATCH] diff-tree: --find-copies-harder
[git.git] / diff-tree.c
1 #include <ctype.h>
2 #include "cache.h"
3 #include "diff.h"
4 #include "commit.h"
5
6 static int show_root_diff = 0;
7 static int verbose_header = 0;
8 static int ignore_merges = 1;
9 static int recursive = 0;
10 static int show_tree_entry_in_recursive = 0;
11 static int read_stdin = 0;
12 static int diff_output_format = DIFF_FORMAT_HUMAN;
13 static int detect_rename = 0;
14 static int find_copies_harder = 0;
15 static int diff_setup_opt = 0;
16 static int diff_score_opt = 0;
17 static const char *pickaxe = NULL;
18 static int pickaxe_opts = 0;
19 static int diff_break_opt = -1;
20 static const char *orderfile = NULL;
21 static const char *header = NULL;
22 static const char *header_prefix = "";
23 static enum cmit_fmt commit_format = CMIT_FMT_RAW;
24
25 // What paths are we interested in?
26 static int nr_paths = 0;
27 static const char **paths = NULL;
28 static int *pathlens = NULL;
29
30 static int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base);
31
32 static void update_tree_entry(void **bufp, unsigned long *sizep)
33 {
34         void *buf = *bufp;
35         unsigned long size = *sizep;
36         int len = strlen(buf) + 1 + 20;
37
38         if (size < len)
39                 die("corrupt tree file");
40         *bufp = buf + len;
41         *sizep = size - len;
42 }
43
44 static const unsigned char *extract(void *tree, unsigned long size, const char **pathp, unsigned int *modep)
45 {
46         int len = strlen(tree)+1;
47         const unsigned char *sha1 = tree + len;
48         const char *path = strchr(tree, ' ');
49         unsigned int mode;
50
51         if (!path || size < len + 20 || sscanf(tree, "%o", &mode) != 1)
52                 die("corrupt tree file");
53         *pathp = path+1;
54         *modep = DIFF_FILE_CANON_MODE(mode);
55         return sha1;
56 }
57
58 static char *malloc_base(const char *base, const char *path, int pathlen)
59 {
60         int baselen = strlen(base);
61         char *newbase = xmalloc(baselen + pathlen + 2);
62         memcpy(newbase, base, baselen);
63         memcpy(newbase + baselen, path, pathlen);
64         memcpy(newbase + baselen + pathlen, "/", 2);
65         return newbase;
66 }
67
68 static void show_file(const char *prefix, void *tree, unsigned long size, const char *base);
69 static void show_tree(const char *prefix, void *tree, unsigned long size, const char *base);
70
71 /* A file entry went away or appeared */
72 static void show_file(const char *prefix, void *tree, unsigned long size, const char *base)
73 {
74         unsigned mode;
75         const char *path;
76         const unsigned char *sha1 = extract(tree, size, &path, &mode);
77
78         if (recursive && S_ISDIR(mode)) {
79                 char type[20];
80                 unsigned long size;
81                 char *newbase = malloc_base(base, path, strlen(path));
82                 void *tree;
83
84                 tree = read_sha1_file(sha1, type, &size);
85                 if (!tree || strcmp(type, "tree"))
86                         die("corrupt tree sha %s", sha1_to_hex(sha1));
87
88                 show_tree(prefix, tree, size, newbase);
89
90                 free(tree);
91                 free(newbase);
92                 return;
93         }
94
95         diff_addremove(prefix[0], mode, sha1, base, path);
96 }
97
98 static int compare_tree_entry(void *tree1, unsigned long size1, void *tree2, unsigned long size2, const char *base)
99 {
100         unsigned mode1, mode2;
101         const char *path1, *path2;
102         const unsigned char *sha1, *sha2;
103         int cmp, pathlen1, pathlen2;
104
105         sha1 = extract(tree1, size1, &path1, &mode1);
106         sha2 = extract(tree2, size2, &path2, &mode2);
107
108         pathlen1 = strlen(path1);
109         pathlen2 = strlen(path2);
110         cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2);
111         if (cmp < 0) {
112                 show_file("-", tree1, size1, base);
113                 return -1;
114         }
115         if (cmp > 0) {
116                 show_file("+", tree2, size2, base);
117                 return 1;
118         }
119         if (!find_copies_harder && !memcmp(sha1, sha2, 20) && mode1 == mode2)
120                 return 0;
121
122         /*
123          * If the filemode has changed to/from a directory from/to a regular
124          * file, we need to consider it a remove and an add.
125          */
126         if (S_ISDIR(mode1) != S_ISDIR(mode2)) {
127                 show_file("-", tree1, size1, base);
128                 show_file("+", tree2, size2, base);
129                 return 0;
130         }
131
132         if (recursive && S_ISDIR(mode1)) {
133                 int retval;
134                 char *newbase = malloc_base(base, path1, pathlen1);
135                 if (show_tree_entry_in_recursive)
136                         diff_change(mode1, mode2, sha1, sha2, base, path1);
137                 retval = diff_tree_sha1(sha1, sha2, newbase);
138                 free(newbase);
139                 return retval;
140         }
141
142         diff_change(mode1, mode2, sha1, sha2, base, path1);
143         return 0;
144 }
145
146 static int interesting(void *tree, unsigned long size, const char *base)
147 {
148         const char *path;
149         unsigned mode;
150         int i;
151         int baselen, pathlen;
152
153         if (!nr_paths)
154                 return 1;
155
156         (void)extract(tree, size, &path, &mode);
157
158         pathlen = strlen(path);
159         baselen = strlen(base);
160
161         for (i=0; i < nr_paths; i++) {
162                 const char *match = paths[i];
163                 int matchlen = pathlens[i];
164
165                 if (baselen >= matchlen) {
166                         /* If it doesn't match, move along... */
167                         if (strncmp(base, match, matchlen))
168                                 continue;
169
170                         /* The base is a subdirectory of a path which was specified. */
171                         return 1;
172                 }
173
174                 /* Does the base match? */
175                 if (strncmp(base, match, baselen))
176                         continue;
177
178                 match += baselen;
179                 matchlen -= baselen;
180
181                 if (pathlen > matchlen)
182                         continue;
183
184                 if (matchlen > pathlen) {
185                         if (match[pathlen] != '/')
186                                 continue;
187                         if (!S_ISDIR(mode))
188                                 continue;
189                 }
190
191                 if (strncmp(path, match, pathlen))
192                         continue;
193
194                 return 1;
195         }
196         return 0; /* No matches */
197 }
198
199 /* A whole sub-tree went away or appeared */
200 static void show_tree(const char *prefix, void *tree, unsigned long size, const char *base)
201 {
202         while (size) {
203                 if (find_copies_harder || interesting(tree, size, base))
204                         show_file(prefix, tree, size, base);
205                 update_tree_entry(&tree, &size);
206         }
207 }
208
209 static int diff_tree(void *tree1, unsigned long size1, void *tree2, unsigned long size2, const char *base)
210 {
211         while (size1 | size2) {
212                 if (nr_paths && size1 && !interesting(tree1, size1, base)) {
213                         update_tree_entry(&tree1, &size1);
214                         continue;
215                 }
216                 if (nr_paths && size2 && !interesting(tree2, size2, base)) {
217                         update_tree_entry(&tree2, &size2);
218                         continue;
219                 }
220                 if (!size1) {
221                         show_file("+", tree2, size2, base);
222                         update_tree_entry(&tree2, &size2);
223                         continue;
224                 }
225                 if (!size2) {
226                         show_file("-", tree1, size1, base);
227                         update_tree_entry(&tree1, &size1);
228                         continue;
229                 }
230                 switch (compare_tree_entry(tree1, size1, tree2, size2, base)) {
231                 case -1:
232                         update_tree_entry(&tree1, &size1);
233                         continue;
234                 case 0:
235                         update_tree_entry(&tree1, &size1);
236                         /* Fallthrough */
237                 case 1:
238                         update_tree_entry(&tree2, &size2);
239                         continue;
240                 }
241                 die("git-diff-tree: internal error");
242         }
243         return 0;
244 }
245
246 static int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base)
247 {
248         void *tree1, *tree2;
249         unsigned long size1, size2;
250         int retval;
251
252         tree1 = read_object_with_reference(old, "tree", &size1, NULL);
253         if (!tree1)
254                 die("unable to read source tree (%s)", sha1_to_hex(old));
255         tree2 = read_object_with_reference(new, "tree", &size2, NULL);
256         if (!tree2)
257                 die("unable to read destination tree (%s)", sha1_to_hex(new));
258         retval = diff_tree(tree1, size1, tree2, size2, base);
259         free(tree1);
260         free(tree2);
261         return retval;
262 }
263
264 static void call_diff_setup(void)
265 {
266         diff_setup(diff_setup_opt);
267 }
268
269 static int call_diff_flush(void)
270 {
271         diffcore_std(find_copies_harder ? paths : 0,
272                      detect_rename, diff_score_opt,
273                      pickaxe, pickaxe_opts,
274                      diff_break_opt,
275                      orderfile);
276         if (diff_queue_is_empty()) {
277                 diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
278                 return 0;
279         }
280         if (header) {
281                 const char *fmt = "%s";
282                 if (diff_output_format == DIFF_FORMAT_MACHINE)
283                         fmt = "%s%c";
284                 
285                 printf(fmt, header, 0);
286                 header = NULL;
287         }
288         diff_flush(diff_output_format, 1);
289         return 1;
290 }
291
292 static int diff_tree_sha1_top(const unsigned char *old,
293                               const unsigned char *new, const char *base)
294 {
295         int ret;
296
297         call_diff_setup();
298         ret = diff_tree_sha1(old, new, base);
299         call_diff_flush();
300         return ret;
301 }
302
303 static int diff_root_tree(const unsigned char *new, const char *base)
304 {
305         int retval;
306         void *tree;
307         unsigned long size;
308
309         call_diff_setup();
310         tree = read_object_with_reference(new, "tree", &size, NULL);
311         if (!tree)
312                 die("unable to read root tree (%s)", sha1_to_hex(new));
313         retval = diff_tree("", 0, tree, size, base);
314         free(tree);
315         call_diff_flush();
316         return retval;
317 }
318
319 static char *generate_header(const char *commit, const char *parent, const char *msg, unsigned long len)
320 {
321         static char this_header[16384];
322         int offset;
323
324         offset = sprintf(this_header, "%s%s (from %s)\n", header_prefix, commit, parent);
325         if (verbose_header) {
326                 offset += pretty_print_commit(commit_format, msg, len, this_header + offset, sizeof(this_header) - offset);
327                 this_header[offset++] = '\n';
328                 this_header[offset++] = 0;
329         }
330
331         return this_header;
332 }
333
334 static int diff_tree_commit(const unsigned char *commit, const char *name)
335 {
336         unsigned long size, offset;
337         char *buf = read_object_with_reference(commit, "commit", &size, NULL);
338
339         if (!buf)
340                 return -1;
341
342         if (!name) {
343                 static char commit_name[60];
344                 strcpy(commit_name, sha1_to_hex(commit));
345                 name = commit_name;
346         }
347
348         /* Root commit? */
349         if (show_root_diff && memcmp(buf + 46, "parent ", 7)) {
350                 header = generate_header(name, "root", buf, size);
351                 diff_root_tree(commit, "");
352         }
353
354         /* More than one parent? */
355         if (ignore_merges) {
356                 if (!memcmp(buf + 46 + 48, "parent ", 7))
357                         return 0;
358         }
359
360         offset = 46;
361         while (offset + 48 < size && !memcmp(buf + offset, "parent ", 7)) {
362                 unsigned char parent[20];
363                 if (get_sha1_hex(buf + offset + 7, parent))
364                         return -1;
365                 header = generate_header(name, sha1_to_hex(parent), buf, size);
366                 diff_tree_sha1_top(parent, commit, "");
367                 if (!header && verbose_header) {
368                         header_prefix = "\ndiff-tree ";
369                         /*
370                          * Don't print multiple merge entries if we
371                          * don't print the diffs.
372                          */
373                 }
374                 offset += 48;
375         }
376         return 0;
377 }
378
379 static int diff_tree_stdin(char *line)
380 {
381         int len = strlen(line);
382         unsigned char commit[20], parent[20];
383         static char this_header[1000];
384
385         if (!len || line[len-1] != '\n')
386                 return -1;
387         line[len-1] = 0;
388         if (get_sha1_hex(line, commit))
389                 return -1;
390         if (isspace(line[40]) && !get_sha1_hex(line+41, parent)) {
391                 line[40] = 0;
392                 line[81] = 0;
393                 sprintf(this_header, "%s (from %s)\n", line, line+41);
394                 header = this_header;
395                 return diff_tree_sha1_top(parent, commit, "");
396         }
397         line[40] = 0;
398         return diff_tree_commit(commit, line);
399 }
400
401 static char *diff_tree_usage =
402 "git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-S<string>] [-O<orderfile>] [-m] [-s] [-v] [--pretty] [-t] <tree-ish> <tree-ish>";
403
404 static enum cmit_fmt get_commit_format(const char *arg)
405 {
406         if (!*arg)
407                 return CMIT_FMT_DEFAULT;
408         if (!strcmp(arg, "=raw"))
409                 return CMIT_FMT_RAW;
410         if (!strcmp(arg, "=medium"))
411                 return CMIT_FMT_MEDIUM;
412         if (!strcmp(arg, "=short"))
413                 return CMIT_FMT_SHORT;
414         usage(diff_tree_usage);
415 }
416
417 int main(int argc, const char **argv)
418 {
419         int nr_sha1;
420         char line[1000];
421         unsigned char sha1[2][20];
422
423         nr_sha1 = 0;
424         for (;;) {
425                 const char *arg;
426
427                 argv++;
428                 argc--;
429                 arg = *argv;
430                 if (!arg)
431                         break;
432
433                 if (*arg != '-') {
434                         if (nr_sha1 < 2 && !get_sha1(arg, sha1[nr_sha1])) {
435                                 nr_sha1++;
436                                 continue;
437                         }
438                         break;
439                 }
440
441                 if (!strcmp(arg, "--")) {
442                         argv++;
443                         argc--;
444                         break;
445                 }
446                 if (!strcmp(arg, "-r")) {
447                         recursive = 1;
448                         continue;
449                 }
450                 if (!strcmp(arg, "-t")) {
451                         recursive = show_tree_entry_in_recursive = 1;
452                         continue;
453                 }
454                 if (!strcmp(arg, "-R")) {
455                         diff_setup_opt |= DIFF_SETUP_REVERSE;
456                         continue;
457                 }
458                 if (!strcmp(arg, "-p")) {
459                         diff_output_format = DIFF_FORMAT_PATCH;
460                         recursive = 1;
461                         continue;
462                 }
463                 if (!strncmp(arg, "-S", 2)) {
464                         pickaxe = arg + 2;
465                         continue;
466                 }
467                 if (!strncmp(arg, "-O", 2)) {
468                         orderfile = arg + 2;
469                         continue;
470                 }
471                 if (!strcmp(arg, "--pickaxe-all")) {
472                         pickaxe_opts = DIFF_PICKAXE_ALL;
473                         continue;
474                 }
475                 if (!strncmp(arg, "-M", 2)) {
476                         detect_rename = DIFF_DETECT_RENAME;
477                         if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
478                                 usage(diff_tree_usage);
479                         continue;
480                 }
481                 if (!strncmp(arg, "-C", 2)) {
482                         detect_rename = DIFF_DETECT_COPY;
483                         if ((diff_score_opt = diff_scoreopt_parse(arg)) == -1)
484                                 usage(diff_tree_usage);
485                         continue;
486                 }
487                 if (!strncmp(arg, "-B", 2)) {
488                         if ((diff_break_opt = diff_scoreopt_parse(arg)) == -1)
489                                 usage(diff_tree_usage);
490                         continue;
491                 }
492                 if (!strcmp(arg, "--find-copies-harder")) {
493                         find_copies_harder = 1;
494                         continue;
495                 }
496                 if (!strcmp(arg, "-z")) {
497                         diff_output_format = DIFF_FORMAT_MACHINE;
498                         continue;
499                 }
500                 if (!strcmp(arg, "-m")) {
501                         ignore_merges = 0;
502                         continue;
503                 }
504                 if (!strcmp(arg, "-s")) {
505                         diff_output_format = DIFF_FORMAT_NO_OUTPUT;
506                         continue;
507                 }
508                 if (!strcmp(arg, "-v")) {
509                         verbose_header = 1;
510                         header_prefix = "diff-tree ";
511                         continue;
512                 }
513                 if (!strncmp(arg, "--pretty", 8)) {
514                         verbose_header = 1;
515                         header_prefix = "diff-tree ";
516                         commit_format = get_commit_format(arg+8);
517                         continue;
518                 }
519                 if (!strcmp(arg, "--stdin")) {
520                         read_stdin = 1;
521                         continue;
522                 }
523                 if (!strcmp(arg, "--root")) {
524                         show_root_diff = 1;
525                         continue;
526                 }
527                 usage(diff_tree_usage);
528         }
529         if (find_copies_harder && detect_rename != DIFF_DETECT_COPY)
530                 usage(diff_tree_usage);
531
532         if (argc > 0) {
533                 int i;
534
535                 paths = argv;
536                 nr_paths = argc;
537                 pathlens = xmalloc(nr_paths * sizeof(int));
538                 for (i=0; i<nr_paths; i++)
539                         pathlens[i] = strlen(paths[i]);
540         }
541
542         switch (nr_sha1) {
543         case 0:
544                 if (!read_stdin)
545                         usage(diff_tree_usage);
546                 break;
547         case 1:
548                 diff_tree_commit(sha1[0], NULL);
549                 break;
550         case 2:
551                 diff_tree_sha1_top(sha1[0], sha1[1], "");
552                 break;
553         }
554
555         if (!read_stdin)
556                 return 0;
557
558         if (detect_rename)
559                 diff_setup_opt |= (DIFF_SETUP_USE_SIZE_CACHE |
560                                    DIFF_SETUP_USE_CACHE);
561         while (fgets(line, sizeof(line), stdin))
562                 diff_tree_stdin(line);
563
564         return 0;
565 }