Allow hierarchical section names
[git.git] / diff.c
1 /*
2  * Copyright (C) 2005 Junio C Hamano
3  */
4 #include <sys/types.h>
5 #include <sys/wait.h>
6 #include <signal.h>
7 #include "cache.h"
8 #include "quote.h"
9 #include "diff.h"
10 #include "diffcore.h"
11
12 static const char *diff_opts = "-pu";
13
14 static int use_size_cache;
15
16 int diff_rename_limit_default = -1;
17
18 static char *quote_one(const char *str)
19 {
20         int needlen;
21         char *xp;
22
23         if (!str)
24                 return NULL;
25         needlen = quote_c_style(str, NULL, NULL, 0);
26         if (!needlen)
27                 return strdup(str);
28         xp = xmalloc(needlen + 1);
29         quote_c_style(str, xp, NULL, 0);
30         return xp;
31 }
32
33 static char *quote_two(const char *one, const char *two)
34 {
35         int need_one = quote_c_style(one, NULL, NULL, 1);
36         int need_two = quote_c_style(two, NULL, NULL, 1);
37         char *xp;
38
39         if (need_one + need_two) {
40                 if (!need_one) need_one = strlen(one);
41                 if (!need_two) need_one = strlen(two);
42
43                 xp = xmalloc(need_one + need_two + 3);
44                 xp[0] = '"';
45                 quote_c_style(one, xp + 1, NULL, 1);
46                 quote_c_style(two, xp + need_one + 1, NULL, 1);
47                 strcpy(xp + need_one + need_two + 1, "\"");
48                 return xp;
49         }
50         need_one = strlen(one);
51         need_two = strlen(two);
52         xp = xmalloc(need_one + need_two + 1);
53         strcpy(xp, one);
54         strcpy(xp + need_one, two);
55         return xp;
56 }
57
58 static const char *external_diff(void)
59 {
60         static const char *external_diff_cmd = NULL;
61         static int done_preparing = 0;
62         const char *env_diff_opts;
63
64         if (done_preparing)
65                 return external_diff_cmd;
66
67         /*
68          * Default values above are meant to match the
69          * Linux kernel development style.  Examples of
70          * alternative styles you can specify via environment
71          * variables are:
72          *
73          * GIT_DIFF_OPTS="-c";
74          */
75         external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
76
77         /* In case external diff fails... */
78         env_diff_opts = getenv("GIT_DIFF_OPTS");
79         if (env_diff_opts) diff_opts = env_diff_opts;
80
81         done_preparing = 1;
82         return external_diff_cmd;
83 }
84
85 #define TEMPFILE_PATH_LEN               50
86
87 static struct diff_tempfile {
88         const char *name; /* filename external diff should read from */
89         char hex[41];
90         char mode[10];
91         char tmp_path[TEMPFILE_PATH_LEN];
92 } diff_temp[2];
93
94 static int count_lines(const char *filename)
95 {
96         FILE *in;
97         int count, ch, completely_empty = 1, nl_just_seen = 0;
98         in = fopen(filename, "r");
99         count = 0;
100         while ((ch = fgetc(in)) != EOF)
101                 if (ch == '\n') {
102                         count++;
103                         nl_just_seen = 1;
104                         completely_empty = 0;
105                 }
106                 else {
107                         nl_just_seen = 0;
108                         completely_empty = 0;
109                 }
110         fclose(in);
111         if (completely_empty)
112                 return 0;
113         if (!nl_just_seen)
114                 count++; /* no trailing newline */
115         return count;
116 }
117
118 static void print_line_count(int count)
119 {
120         switch (count) {
121         case 0:
122                 printf("0,0");
123                 break;
124         case 1:
125                 printf("1");
126                 break;
127         default:
128                 printf("1,%d", count);
129                 break;
130         }
131 }
132
133 static void copy_file(int prefix, const char *filename)
134 {
135         FILE *in;
136         int ch, nl_just_seen = 1;
137         in = fopen(filename, "r");
138         while ((ch = fgetc(in)) != EOF) {
139                 if (nl_just_seen)
140                         putchar(prefix);
141                 putchar(ch);
142                 if (ch == '\n')
143                         nl_just_seen = 1;
144                 else
145                         nl_just_seen = 0;
146         }
147         fclose(in);
148         if (!nl_just_seen)
149                 printf("\n\\ No newline at end of file\n");
150 }
151
152 static void emit_rewrite_diff(const char *name_a,
153                               const char *name_b,
154                               struct diff_tempfile *temp)
155 {
156         /* Use temp[i].name as input, name_a and name_b as labels */
157         int lc_a, lc_b;
158         lc_a = count_lines(temp[0].name);
159         lc_b = count_lines(temp[1].name);
160         printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
161         print_line_count(lc_a);
162         printf(" +");
163         print_line_count(lc_b);
164         printf(" @@\n");
165         if (lc_a)
166                 copy_file('-', temp[0].name);
167         if (lc_b)
168                 copy_file('+', temp[1].name);
169 }
170
171 static void builtin_diff(const char *name_a,
172                          const char *name_b,
173                          struct diff_tempfile *temp,
174                          const char *xfrm_msg,
175                          int complete_rewrite)
176 {
177         int i, next_at, cmd_size;
178         const char *const diff_cmd = "diff -L%s -L%s";
179         const char *const diff_arg  = "-- %s %s||:"; /* "||:" is to return 0 */
180         const char *input_name_sq[2];
181         const char *label_path[2];
182         char *cmd;
183
184         /* diff_cmd and diff_arg have 4 %s in total which makes
185          * the sum of these strings 8 bytes larger than required.
186          * we use 2 spaces around diff-opts, and we need to count
187          * terminating NUL; we used to subtract 5 here, but we do not
188          * care about small leaks in this subprocess that is about
189          * to exec "diff" anymore.
190          */
191         cmd_size = (strlen(diff_cmd) + strlen(diff_opts) + strlen(diff_arg)
192                     + 128);
193
194         for (i = 0; i < 2; i++) {
195                 input_name_sq[i] = sq_quote(temp[i].name);
196                 if (!strcmp(temp[i].name, "/dev/null"))
197                         label_path[i] = "/dev/null";
198                 else if (!i)
199                         label_path[i] = sq_quote(quote_two("a/", name_a));
200                 else
201                         label_path[i] = sq_quote(quote_two("b/", name_b));
202                 cmd_size += (strlen(label_path[i]) + strlen(input_name_sq[i]));
203         }
204
205         cmd = xmalloc(cmd_size);
206
207         next_at = 0;
208         next_at += snprintf(cmd+next_at, cmd_size-next_at,
209                             diff_cmd, label_path[0], label_path[1]);
210         next_at += snprintf(cmd+next_at, cmd_size-next_at,
211                             " %s ", diff_opts);
212         next_at += snprintf(cmd+next_at, cmd_size-next_at,
213                             diff_arg, input_name_sq[0], input_name_sq[1]);
214
215         printf("diff --git %s %s\n",
216                quote_two("a/", name_a), quote_two("b/", name_b));
217         if (label_path[0][0] == '/') {
218                 /* dev/null */
219                 printf("new file mode %s\n", temp[1].mode);
220                 if (xfrm_msg && xfrm_msg[0])
221                         puts(xfrm_msg);
222         }
223         else if (label_path[1][0] == '/') {
224                 printf("deleted file mode %s\n", temp[0].mode);
225                 if (xfrm_msg && xfrm_msg[0])
226                         puts(xfrm_msg);
227         }
228         else {
229                 if (strcmp(temp[0].mode, temp[1].mode)) {
230                         printf("old mode %s\n", temp[0].mode);
231                         printf("new mode %s\n", temp[1].mode);
232                 }
233                 if (xfrm_msg && xfrm_msg[0])
234                         puts(xfrm_msg);
235                 if (strncmp(temp[0].mode, temp[1].mode, 3))
236                         /* we do not run diff between different kind
237                          * of objects.
238                          */
239                         exit(0);
240                 if (complete_rewrite) {
241                         fflush(NULL);
242                         emit_rewrite_diff(name_a, name_b, temp);
243                         exit(0);
244                 }
245         }
246         fflush(NULL);
247         execlp("/bin/sh","sh", "-c", cmd, NULL);
248 }
249
250 struct diff_filespec *alloc_filespec(const char *path)
251 {
252         int namelen = strlen(path);
253         struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
254
255         memset(spec, 0, sizeof(*spec));
256         spec->path = (char *)(spec + 1);
257         memcpy(spec->path, path, namelen+1);
258         return spec;
259 }
260
261 void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
262                    unsigned short mode)
263 {
264         if (mode) {
265                 spec->mode = DIFF_FILE_CANON_MODE(mode);
266                 memcpy(spec->sha1, sha1, 20);
267                 spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
268         }
269 }
270
271 /*
272  * Given a name and sha1 pair, if the dircache tells us the file in
273  * the work tree has that object contents, return true, so that
274  * prepare_temp_file() does not have to inflate and extract.
275  */
276 static int work_tree_matches(const char *name, const unsigned char *sha1)
277 {
278         struct cache_entry *ce;
279         struct stat st;
280         int pos, len;
281
282         /* We do not read the cache ourselves here, because the
283          * benchmark with my previous version that always reads cache
284          * shows that it makes things worse for diff-tree comparing
285          * two linux-2.6 kernel trees in an already checked out work
286          * tree.  This is because most diff-tree comparisons deal with
287          * only a small number of files, while reading the cache is
288          * expensive for a large project, and its cost outweighs the
289          * savings we get by not inflating the object to a temporary
290          * file.  Practically, this code only helps when we are used
291          * by diff-cache --cached, which does read the cache before
292          * calling us.
293          */
294         if (!active_cache)
295                 return 0;
296
297         len = strlen(name);
298         pos = cache_name_pos(name, len);
299         if (pos < 0)
300                 return 0;
301         ce = active_cache[pos];
302         if ((lstat(name, &st) < 0) ||
303             !S_ISREG(st.st_mode) || /* careful! */
304             ce_match_stat(ce, &st) ||
305             memcmp(sha1, ce->sha1, 20))
306                 return 0;
307         /* we return 1 only when we can stat, it is a regular file,
308          * stat information matches, and sha1 recorded in the cache
309          * matches.  I.e. we know the file in the work tree really is
310          * the same as the <name, sha1> pair.
311          */
312         return 1;
313 }
314
315 static struct sha1_size_cache {
316         unsigned char sha1[20];
317         unsigned long size;
318 } **sha1_size_cache;
319 static int sha1_size_cache_nr, sha1_size_cache_alloc;
320
321 static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
322                                                  int find_only,
323                                                  unsigned long size)
324 {
325         int first, last;
326         struct sha1_size_cache *e;
327
328         first = 0;
329         last = sha1_size_cache_nr;
330         while (last > first) {
331                 int cmp, next = (last + first) >> 1;
332                 e = sha1_size_cache[next];
333                 cmp = memcmp(e->sha1, sha1, 20);
334                 if (!cmp)
335                         return e;
336                 if (cmp < 0) {
337                         last = next;
338                         continue;
339                 }
340                 first = next+1;
341         }
342         /* not found */
343         if (find_only)
344                 return NULL;
345         /* insert to make it at "first" */
346         if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
347                 sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
348                 sha1_size_cache = xrealloc(sha1_size_cache,
349                                            sha1_size_cache_alloc *
350                                            sizeof(*sha1_size_cache));
351         }
352         sha1_size_cache_nr++;
353         if (first < sha1_size_cache_nr)
354                 memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
355                         (sha1_size_cache_nr - first - 1) *
356                         sizeof(*sha1_size_cache));
357         e = xmalloc(sizeof(struct sha1_size_cache));
358         sha1_size_cache[first] = e;
359         memcpy(e->sha1, sha1, 20);
360         e->size = size;
361         return e;
362 }
363
364 /*
365  * While doing rename detection and pickaxe operation, we may need to
366  * grab the data for the blob (or file) for our own in-core comparison.
367  * diff_filespec has data and size fields for this purpose.
368  */
369 int diff_populate_filespec(struct diff_filespec *s, int size_only)
370 {
371         int err = 0;
372         if (!DIFF_FILE_VALID(s))
373                 die("internal error: asking to populate invalid file.");
374         if (S_ISDIR(s->mode))
375                 return -1;
376
377         if (!use_size_cache)
378                 size_only = 0;
379
380         if (s->data)
381                 return err;
382         if (!s->sha1_valid ||
383             work_tree_matches(s->path, s->sha1)) {
384                 struct stat st;
385                 int fd;
386                 if (lstat(s->path, &st) < 0) {
387                         if (errno == ENOENT) {
388                         err_empty:
389                                 err = -1;
390                         empty:
391                                 s->data = "";
392                                 s->size = 0;
393                                 return err;
394                         }
395                 }
396                 s->size = st.st_size;
397                 if (!s->size)
398                         goto empty;
399                 if (size_only)
400                         return 0;
401                 if (S_ISLNK(st.st_mode)) {
402                         int ret;
403                         s->data = xmalloc(s->size);
404                         s->should_free = 1;
405                         ret = readlink(s->path, s->data, s->size);
406                         if (ret < 0) {
407                                 free(s->data);
408                                 goto err_empty;
409                         }
410                         return 0;
411                 }
412                 fd = open(s->path, O_RDONLY);
413                 if (fd < 0)
414                         goto err_empty;
415                 s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
416                 close(fd);
417                 if (s->data == MAP_FAILED)
418                         goto err_empty;
419                 s->should_munmap = 1;
420         }
421         else {
422                 char type[20];
423                 struct sha1_size_cache *e;
424
425                 if (size_only) {
426                         e = locate_size_cache(s->sha1, 1, 0);
427                         if (e) {
428                                 s->size = e->size;
429                                 return 0;
430                         }
431                         if (!sha1_object_info(s->sha1, type, &s->size))
432                                 locate_size_cache(s->sha1, 0, s->size);
433                 }
434                 else {
435                         s->data = read_sha1_file(s->sha1, type, &s->size);
436                         s->should_free = 1;
437                 }
438         }
439         return 0;
440 }
441
442 void diff_free_filespec_data(struct diff_filespec *s)
443 {
444         if (s->should_free)
445                 free(s->data);
446         else if (s->should_munmap)
447                 munmap(s->data, s->size);
448         s->should_free = s->should_munmap = 0;
449         s->data = NULL;
450 }
451
452 static void prep_temp_blob(struct diff_tempfile *temp,
453                            void *blob,
454                            unsigned long size,
455                            const unsigned char *sha1,
456                            int mode)
457 {
458         int fd;
459
460         fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX");
461         if (fd < 0)
462                 die("unable to create temp-file");
463         if (write(fd, blob, size) != size)
464                 die("unable to write temp-file");
465         close(fd);
466         temp->name = temp->tmp_path;
467         strcpy(temp->hex, sha1_to_hex(sha1));
468         temp->hex[40] = 0;
469         sprintf(temp->mode, "%06o", mode);
470 }
471
472 static void prepare_temp_file(const char *name,
473                               struct diff_tempfile *temp,
474                               struct diff_filespec *one)
475 {
476         if (!DIFF_FILE_VALID(one)) {
477         not_a_valid_file:
478                 /* A '-' entry produces this for file-2, and
479                  * a '+' entry produces this for file-1.
480                  */
481                 temp->name = "/dev/null";
482                 strcpy(temp->hex, ".");
483                 strcpy(temp->mode, ".");
484                 return;
485         }
486
487         if (!one->sha1_valid ||
488             work_tree_matches(name, one->sha1)) {
489                 struct stat st;
490                 if (lstat(name, &st) < 0) {
491                         if (errno == ENOENT)
492                                 goto not_a_valid_file;
493                         die("stat(%s): %s", name, strerror(errno));
494                 }
495                 if (S_ISLNK(st.st_mode)) {
496                         int ret;
497                         char *buf, buf_[1024];
498                         buf = ((sizeof(buf_) < st.st_size) ?
499                                xmalloc(st.st_size) : buf_);
500                         ret = readlink(name, buf, st.st_size);
501                         if (ret < 0)
502                                 die("readlink(%s)", name);
503                         prep_temp_blob(temp, buf, st.st_size,
504                                        (one->sha1_valid ?
505                                         one->sha1 : null_sha1),
506                                        (one->sha1_valid ?
507                                         one->mode : S_IFLNK));
508                 }
509                 else {
510                         /* we can borrow from the file in the work tree */
511                         temp->name = name;
512                         if (!one->sha1_valid)
513                                 strcpy(temp->hex, sha1_to_hex(null_sha1));
514                         else
515                                 strcpy(temp->hex, sha1_to_hex(one->sha1));
516                         /* Even though we may sometimes borrow the
517                          * contents from the work tree, we always want
518                          * one->mode.  mode is trustworthy even when
519                          * !(one->sha1_valid), as long as
520                          * DIFF_FILE_VALID(one).
521                          */
522                         sprintf(temp->mode, "%06o", one->mode);
523                 }
524                 return;
525         }
526         else {
527                 if (diff_populate_filespec(one, 0))
528                         die("cannot read data blob for %s", one->path);
529                 prep_temp_blob(temp, one->data, one->size,
530                                one->sha1, one->mode);
531         }
532 }
533
534 static void remove_tempfile(void)
535 {
536         int i;
537
538         for (i = 0; i < 2; i++)
539                 if (diff_temp[i].name == diff_temp[i].tmp_path) {
540                         unlink(diff_temp[i].name);
541                         diff_temp[i].name = NULL;
542                 }
543 }
544
545 static void remove_tempfile_on_signal(int signo)
546 {
547         remove_tempfile();
548 }
549
550 /* An external diff command takes:
551  *
552  * diff-cmd name infile1 infile1-sha1 infile1-mode \
553  *               infile2 infile2-sha1 infile2-mode [ rename-to ]
554  *
555  */
556 static void run_external_diff(const char *pgm,
557                               const char *name,
558                               const char *other,
559                               struct diff_filespec *one,
560                               struct diff_filespec *two,
561                               const char *xfrm_msg,
562                               int complete_rewrite)
563 {
564         struct diff_tempfile *temp = diff_temp;
565         pid_t pid;
566         int status;
567         static int atexit_asked = 0;
568         const char *othername;
569
570         othername = (other? other : name);
571         if (one && two) {
572                 prepare_temp_file(name, &temp[0], one);
573                 prepare_temp_file(othername, &temp[1], two);
574                 if (! atexit_asked &&
575                     (temp[0].name == temp[0].tmp_path ||
576                      temp[1].name == temp[1].tmp_path)) {
577                         atexit_asked = 1;
578                         atexit(remove_tempfile);
579                 }
580                 signal(SIGINT, remove_tempfile_on_signal);
581         }
582
583         fflush(NULL);
584         pid = fork();
585         if (pid < 0)
586                 die("unable to fork");
587         if (!pid) {
588                 if (pgm) {
589                         if (one && two) {
590                                 const char *exec_arg[10];
591                                 const char **arg = &exec_arg[0];
592                                 *arg++ = pgm;
593                                 *arg++ = name;
594                                 *arg++ = temp[0].name;
595                                 *arg++ = temp[0].hex;
596                                 *arg++ = temp[0].mode;
597                                 *arg++ = temp[1].name;
598                                 *arg++ = temp[1].hex;
599                                 *arg++ = temp[1].mode;
600                                 if (other) {
601                                         *arg++ = other;
602                                         *arg++ = xfrm_msg;
603                                 }
604                                 *arg = NULL;
605                                 execvp(pgm, (char *const*) exec_arg);
606                         }
607                         else
608                                 execlp(pgm, pgm, name, NULL);
609                 }
610                 /*
611                  * otherwise we use the built-in one.
612                  */
613                 if (one && two)
614                         builtin_diff(name, othername, temp, xfrm_msg,
615                                      complete_rewrite);
616                 else
617                         printf("* Unmerged path %s\n", name);
618                 exit(0);
619         }
620         if (waitpid(pid, &status, 0) < 0 ||
621             !WIFEXITED(status) || WEXITSTATUS(status)) {
622                 /* Earlier we did not check the exit status because
623                  * diff exits non-zero if files are different, and
624                  * we are not interested in knowing that.  It was a
625                  * mistake which made it harder to quit a diff-*
626                  * session that uses the git-apply-patch-script as
627                  * the GIT_EXTERNAL_DIFF.  A custom GIT_EXTERNAL_DIFF
628                  * should also exit non-zero only when it wants to
629                  * abort the entire diff-* session.
630                  */
631                 remove_tempfile();
632                 fprintf(stderr, "external diff died, stopping at %s.\n", name);
633                 exit(1);
634         }
635         remove_tempfile();
636 }
637
638 static void diff_fill_sha1_info(struct diff_filespec *one)
639 {
640         if (DIFF_FILE_VALID(one)) {
641                 if (!one->sha1_valid) {
642                         struct stat st;
643                         if (stat(one->path, &st) < 0)
644                                 die("stat %s", one->path);
645                         if (index_path(one->sha1, one->path, &st, 0))
646                                 die("cannot hash %s\n", one->path);
647                 }
648         }
649         else
650                 memset(one->sha1, 0, 20);
651 }
652
653 static void run_diff(struct diff_filepair *p, struct diff_options *o)
654 {
655         const char *pgm = external_diff();
656         char msg[PATH_MAX*2+300], *xfrm_msg;
657         struct diff_filespec *one;
658         struct diff_filespec *two;
659         const char *name;
660         const char *other;
661         char *name_munged, *other_munged;
662         int complete_rewrite = 0;
663         int len;
664
665         if (DIFF_PAIR_UNMERGED(p)) {
666                 /* unmerged */
667                 run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL,
668                                   0);
669                 return;
670         }
671
672         name = p->one->path;
673         other = (strcmp(name, p->two->path) ? p->two->path : NULL);
674         name_munged = quote_one(name);
675         other_munged = quote_one(other);
676         one = p->one; two = p->two;
677
678         diff_fill_sha1_info(one);
679         diff_fill_sha1_info(two);
680
681         len = 0;
682         switch (p->status) {
683         case DIFF_STATUS_COPIED:
684                 len += snprintf(msg + len, sizeof(msg) - len,
685                                 "similarity index %d%%\n"
686                                 "copy from %s\n"
687                                 "copy to %s\n",
688                                 (int)(0.5 + p->score * 100.0/MAX_SCORE),
689                                 name_munged, other_munged);
690                 break;
691         case DIFF_STATUS_RENAMED:
692                 len += snprintf(msg + len, sizeof(msg) - len,
693                                 "similarity index %d%%\n"
694                                 "rename from %s\n"
695                                 "rename to %s\n",
696                                 (int)(0.5 + p->score * 100.0/MAX_SCORE),
697                                 name_munged, other_munged);
698                 break;
699         case DIFF_STATUS_MODIFIED:
700                 if (p->score) {
701                         len += snprintf(msg + len, sizeof(msg) - len,
702                                         "dissimilarity index %d%%\n",
703                                         (int)(0.5 + p->score *
704                                               100.0/MAX_SCORE));
705                         complete_rewrite = 1;
706                         break;
707                 }
708                 /* fallthru */
709         default:
710                 /* nothing */
711                 ;
712         }
713
714         if (memcmp(one->sha1, two->sha1, 20)) {
715                 char one_sha1[41];
716                 const char *index_fmt = o->full_index ? "index %s..%s" : "index %.7s..%.7s";
717                 memcpy(one_sha1, sha1_to_hex(one->sha1), 41);
718
719                 len += snprintf(msg + len, sizeof(msg) - len,
720                                 index_fmt, one_sha1, sha1_to_hex(two->sha1));
721                 if (one->mode == two->mode)
722                         len += snprintf(msg + len, sizeof(msg) - len,
723                                         " %06o", one->mode);
724                 len += snprintf(msg + len, sizeof(msg) - len, "\n");
725         }
726
727         if (len)
728                 msg[--len] = 0;
729         xfrm_msg = len ? msg : NULL;
730
731         if (!pgm &&
732             DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
733             (S_IFMT & one->mode) != (S_IFMT & two->mode)) {
734                 /* a filepair that changes between file and symlink
735                  * needs to be split into deletion and creation.
736                  */
737                 struct diff_filespec *null = alloc_filespec(two->path);
738                 run_external_diff(NULL, name, other, one, null, xfrm_msg, 0);
739                 free(null);
740                 null = alloc_filespec(one->path);
741                 run_external_diff(NULL, name, other, null, two, xfrm_msg, 0);
742                 free(null);
743         }
744         else
745                 run_external_diff(pgm, name, other, one, two, xfrm_msg,
746                                   complete_rewrite);
747
748         free(name_munged);
749         free(other_munged);
750 }
751
752 void diff_setup(struct diff_options *options)
753 {
754         memset(options, 0, sizeof(*options));
755         options->output_format = DIFF_FORMAT_RAW;
756         options->line_termination = '\n';
757         options->break_opt = -1;
758         options->rename_limit = -1;
759
760         options->change = diff_change;
761         options->add_remove = diff_addremove;
762 }
763
764 int diff_setup_done(struct diff_options *options)
765 {
766         if ((options->find_copies_harder &&
767              options->detect_rename != DIFF_DETECT_COPY) ||
768             (0 <= options->rename_limit && !options->detect_rename))
769                 return -1;
770         if (options->detect_rename && options->rename_limit < 0)
771                 options->rename_limit = diff_rename_limit_default;
772         if (options->setup & DIFF_SETUP_USE_CACHE) {
773                 if (!active_cache)
774                         /* read-cache does not die even when it fails
775                          * so it is safe for us to do this here.  Also
776                          * it does not smudge active_cache or active_nr
777                          * when it fails, so we do not have to worry about
778                          * cleaning it up oufselves either.
779                          */
780                         read_cache();
781         }
782         if (options->setup & DIFF_SETUP_USE_SIZE_CACHE)
783                 use_size_cache = 1;
784
785         return 0;
786 }
787
788 int diff_opt_parse(struct diff_options *options, const char **av, int ac)
789 {
790         const char *arg = av[0];
791         if (!strcmp(arg, "-p") || !strcmp(arg, "-u"))
792                 options->output_format = DIFF_FORMAT_PATCH;
793         else if (!strcmp(arg, "-z"))
794                 options->line_termination = 0;
795         else if (!strncmp(arg, "-l", 2))
796                 options->rename_limit = strtoul(arg+2, NULL, 10);
797         else if (!strcmp(arg, "--full-index"))
798                 options->full_index = 1;
799         else if (!strcmp(arg, "--name-only"))
800                 options->output_format = DIFF_FORMAT_NAME;
801         else if (!strcmp(arg, "--name-status"))
802                 options->output_format = DIFF_FORMAT_NAME_STATUS;
803         else if (!strcmp(arg, "-R"))
804                 options->reverse_diff = 1;
805         else if (!strncmp(arg, "-S", 2))
806                 options->pickaxe = arg + 2;
807         else if (!strcmp(arg, "-s"))
808                 options->output_format = DIFF_FORMAT_NO_OUTPUT;
809         else if (!strncmp(arg, "-O", 2))
810                 options->orderfile = arg + 2;
811         else if (!strncmp(arg, "--diff-filter=", 14))
812                 options->filter = arg + 14;
813         else if (!strcmp(arg, "--pickaxe-all"))
814                 options->pickaxe_opts = DIFF_PICKAXE_ALL;
815         else if (!strncmp(arg, "-B", 2)) {
816                 if ((options->break_opt =
817                      diff_scoreopt_parse(arg)) == -1)
818                         return -1;
819         }
820         else if (!strncmp(arg, "-M", 2)) {
821                 if ((options->rename_score =
822                      diff_scoreopt_parse(arg)) == -1)
823                         return -1;
824                 options->detect_rename = DIFF_DETECT_RENAME;
825         }
826         else if (!strncmp(arg, "-C", 2)) {
827                 if ((options->rename_score =
828                      diff_scoreopt_parse(arg)) == -1)
829                         return -1;
830                 options->detect_rename = DIFF_DETECT_COPY;
831         }
832         else if (!strcmp(arg, "--find-copies-harder"))
833                 options->find_copies_harder = 1;
834         else
835                 return 0;
836         return 1;
837 }
838
839 static int parse_num(const char **cp_p)
840 {
841         int num, scale, ch, cnt;
842         const char *cp = *cp_p;
843
844         cnt = num = 0;
845         scale = 1;
846         while ('0' <= (ch = *cp) && ch <= '9') {
847                 if (cnt++ < 5) {
848                         /* We simply ignore more than 5 digits precision. */
849                         scale *= 10;
850                         num = num * 10 + ch - '0';
851                 }
852                 cp++;
853         }
854         *cp_p = cp;
855
856         /* special case: -M100 would mean 1.0 not 0.1 */
857         if (num == 100 && scale == 1000)
858                 return MAX_SCORE;
859
860         /* user says num divided by scale and we say internally that
861          * is MAX_SCORE * num / scale.
862          */
863         return (MAX_SCORE * num / scale);
864 }
865
866 int diff_scoreopt_parse(const char *opt)
867 {
868         int opt1, opt2, cmd;
869
870         if (*opt++ != '-')
871                 return -1;
872         cmd = *opt++;
873         if (cmd != 'M' && cmd != 'C' && cmd != 'B')
874                 return -1; /* that is not a -M, -C nor -B option */
875
876         opt1 = parse_num(&opt);
877         if (cmd != 'B')
878                 opt2 = 0;
879         else {
880                 if (*opt == 0)
881                         opt2 = 0;
882                 else if (*opt != '/')
883                         return -1; /* we expect -B80/99 or -B80 */
884                 else {
885                         opt++;
886                         opt2 = parse_num(&opt);
887                 }
888         }
889         if (*opt != 0)
890                 return -1;
891         return opt1 | (opt2 << 16);
892 }
893
894 struct diff_queue_struct diff_queued_diff;
895
896 void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
897 {
898         if (queue->alloc <= queue->nr) {
899                 queue->alloc = alloc_nr(queue->alloc);
900                 queue->queue = xrealloc(queue->queue,
901                                         sizeof(dp) * queue->alloc);
902         }
903         queue->queue[queue->nr++] = dp;
904 }
905
906 struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
907                                  struct diff_filespec *one,
908                                  struct diff_filespec *two)
909 {
910         struct diff_filepair *dp = xmalloc(sizeof(*dp));
911         dp->one = one;
912         dp->two = two;
913         dp->score = 0;
914         dp->status = 0;
915         dp->source_stays = 0;
916         dp->broken_pair = 0;
917         if (queue)
918                 diff_q(queue, dp);
919         return dp;
920 }
921
922 void diff_free_filepair(struct diff_filepair *p)
923 {
924         diff_free_filespec_data(p->one);
925         diff_free_filespec_data(p->two);
926         free(p->one);
927         free(p->two);
928         free(p);
929 }
930
931 static void diff_flush_raw(struct diff_filepair *p,
932                            int line_termination,
933                            int inter_name_termination,
934                            int output_format)
935 {
936         int two_paths;
937         char status[10];
938         const char *path_one, *path_two;
939
940         path_one = p->one->path;
941         path_two = p->two->path;
942         if (line_termination) {
943                 path_one = quote_one(path_one);
944                 path_two = quote_one(path_two);
945         }
946
947         if (p->score)
948                 sprintf(status, "%c%03d", p->status,
949                         (int)(0.5 + p->score * 100.0/MAX_SCORE));
950         else {
951                 status[0] = p->status;
952                 status[1] = 0;
953         }
954         switch (p->status) {
955         case DIFF_STATUS_COPIED:
956         case DIFF_STATUS_RENAMED:
957                 two_paths = 1;
958                 break;
959         case DIFF_STATUS_ADDED:
960         case DIFF_STATUS_DELETED:
961                 two_paths = 0;
962                 break;
963         default:
964                 two_paths = 0;
965                 break;
966         }
967         if (output_format != DIFF_FORMAT_NAME_STATUS) {
968                 printf(":%06o %06o %s ",
969                        p->one->mode, p->two->mode, sha1_to_hex(p->one->sha1));
970                 printf("%s ", sha1_to_hex(p->two->sha1));
971         }
972         printf("%s%c%s", status, inter_name_termination, path_one);
973         if (two_paths)
974                 printf("%c%s", inter_name_termination, path_two);
975         putchar(line_termination);
976         if (path_one != p->one->path)
977                 free((void*)path_one);
978         if (path_two != p->two->path)
979                 free((void*)path_two);
980 }
981
982 static void diff_flush_name(struct diff_filepair *p,
983                             int inter_name_termination,
984                             int line_termination)
985 {
986         char *path = p->two->path;
987
988         if (line_termination)
989                 path = quote_one(p->two->path);
990         else
991                 path = p->two->path;
992         printf("%s%c", path, line_termination);
993         if (p->two->path != path)
994                 free(path);
995 }
996
997 int diff_unmodified_pair(struct diff_filepair *p)
998 {
999         /* This function is written stricter than necessary to support
1000          * the currently implemented transformers, but the idea is to
1001          * let transformers to produce diff_filepairs any way they want,
1002          * and filter and clean them up here before producing the output.
1003          */
1004         struct diff_filespec *one, *two;
1005
1006         if (DIFF_PAIR_UNMERGED(p))
1007                 return 0; /* unmerged is interesting */
1008
1009         one = p->one;
1010         two = p->two;
1011
1012         /* deletion, addition, mode or type change
1013          * and rename are all interesting.
1014          */
1015         if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
1016             DIFF_PAIR_MODE_CHANGED(p) ||
1017             strcmp(one->path, two->path))
1018                 return 0;
1019
1020         /* both are valid and point at the same path.  that is, we are
1021          * dealing with a change.
1022          */
1023         if (one->sha1_valid && two->sha1_valid &&
1024             !memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
1025                 return 1; /* no change */
1026         if (!one->sha1_valid && !two->sha1_valid)
1027                 return 1; /* both look at the same file on the filesystem. */
1028         return 0;
1029 }
1030
1031 static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
1032 {
1033         if (diff_unmodified_pair(p))
1034                 return;
1035
1036         if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
1037             (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
1038                 return; /* no tree diffs in patch format */ 
1039
1040         run_diff(p, o);
1041 }
1042
1043 int diff_queue_is_empty(void)
1044 {
1045         struct diff_queue_struct *q = &diff_queued_diff;
1046         int i;
1047         for (i = 0; i < q->nr; i++)
1048                 if (!diff_unmodified_pair(q->queue[i]))
1049                         return 0;
1050         return 1;
1051 }
1052
1053 #if DIFF_DEBUG
1054 void diff_debug_filespec(struct diff_filespec *s, int x, const char *one)
1055 {
1056         fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
1057                 x, one ? one : "",
1058                 s->path,
1059                 DIFF_FILE_VALID(s) ? "valid" : "invalid",
1060                 s->mode,
1061                 s->sha1_valid ? sha1_to_hex(s->sha1) : "");
1062         fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
1063                 x, one ? one : "",
1064                 s->size, s->xfrm_flags);
1065 }
1066
1067 void diff_debug_filepair(const struct diff_filepair *p, int i)
1068 {
1069         diff_debug_filespec(p->one, i, "one");
1070         diff_debug_filespec(p->two, i, "two");
1071         fprintf(stderr, "score %d, status %c stays %d broken %d\n",
1072                 p->score, p->status ? p->status : '?',
1073                 p->source_stays, p->broken_pair);
1074 }
1075
1076 void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
1077 {
1078         int i;
1079         if (msg)
1080                 fprintf(stderr, "%s\n", msg);
1081         fprintf(stderr, "q->nr = %d\n", q->nr);
1082         for (i = 0; i < q->nr; i++) {
1083                 struct diff_filepair *p = q->queue[i];
1084                 diff_debug_filepair(p, i);
1085         }
1086 }
1087 #endif
1088
1089 static void diff_resolve_rename_copy(void)
1090 {
1091         int i, j;
1092         struct diff_filepair *p, *pp;
1093         struct diff_queue_struct *q = &diff_queued_diff;
1094
1095         diff_debug_queue("resolve-rename-copy", q);
1096
1097         for (i = 0; i < q->nr; i++) {
1098                 p = q->queue[i];
1099                 p->status = 0; /* undecided */
1100                 if (DIFF_PAIR_UNMERGED(p))
1101                         p->status = DIFF_STATUS_UNMERGED;
1102                 else if (!DIFF_FILE_VALID(p->one))
1103                         p->status = DIFF_STATUS_ADDED;
1104                 else if (!DIFF_FILE_VALID(p->two))
1105                         p->status = DIFF_STATUS_DELETED;
1106                 else if (DIFF_PAIR_TYPE_CHANGED(p))
1107                         p->status = DIFF_STATUS_TYPE_CHANGED;
1108
1109                 /* from this point on, we are dealing with a pair
1110                  * whose both sides are valid and of the same type, i.e.
1111                  * either in-place edit or rename/copy edit.
1112                  */
1113                 else if (DIFF_PAIR_RENAME(p)) {
1114                         if (p->source_stays) {
1115                                 p->status = DIFF_STATUS_COPIED;
1116                                 continue;
1117                         }
1118                         /* See if there is some other filepair that
1119                          * copies from the same source as us.  If so
1120                          * we are a copy.  Otherwise we are either a
1121                          * copy if the path stays, or a rename if it
1122                          * does not, but we already handled "stays" case.
1123                          */
1124                         for (j = i + 1; j < q->nr; j++) {
1125                                 pp = q->queue[j];
1126                                 if (strcmp(pp->one->path, p->one->path))
1127                                         continue; /* not us */
1128                                 if (!DIFF_PAIR_RENAME(pp))
1129                                         continue; /* not a rename/copy */
1130                                 /* pp is a rename/copy from the same source */
1131                                 p->status = DIFF_STATUS_COPIED;
1132                                 break;
1133                         }
1134                         if (!p->status)
1135                                 p->status = DIFF_STATUS_RENAMED;
1136                 }
1137                 else if (memcmp(p->one->sha1, p->two->sha1, 20) ||
1138                          p->one->mode != p->two->mode)
1139                         p->status = DIFF_STATUS_MODIFIED;
1140                 else {
1141                         /* This is a "no-change" entry and should not
1142                          * happen anymore, but prepare for broken callers.
1143                          */
1144                         error("feeding unmodified %s to diffcore",
1145                               p->one->path);
1146                         p->status = DIFF_STATUS_UNKNOWN;
1147                 }
1148         }
1149         diff_debug_queue("resolve-rename-copy done", q);
1150 }
1151
1152 void diff_flush(struct diff_options *options)
1153 {
1154         struct diff_queue_struct *q = &diff_queued_diff;
1155         int i;
1156         int inter_name_termination = '\t';
1157         int diff_output_format = options->output_format;
1158         int line_termination = options->line_termination;
1159
1160         if (!line_termination)
1161                 inter_name_termination = 0;
1162
1163         for (i = 0; i < q->nr; i++) {
1164                 struct diff_filepair *p = q->queue[i];
1165                 if ((diff_output_format == DIFF_FORMAT_NO_OUTPUT) ||
1166                     (p->status == DIFF_STATUS_UNKNOWN))
1167                         continue;
1168                 if (p->status == 0)
1169                         die("internal error in diff-resolve-rename-copy");
1170                 switch (diff_output_format) {
1171                 case DIFF_FORMAT_PATCH:
1172                         diff_flush_patch(p, options);
1173                         break;
1174                 case DIFF_FORMAT_RAW:
1175                 case DIFF_FORMAT_NAME_STATUS:
1176                         diff_flush_raw(p, line_termination,
1177                                        inter_name_termination,
1178                                        diff_output_format);
1179                         break;
1180                 case DIFF_FORMAT_NAME:
1181                         diff_flush_name(p,
1182                                         inter_name_termination,
1183                                         line_termination);
1184                         break;
1185                 }
1186                 diff_free_filepair(q->queue[i]);
1187         }
1188         free(q->queue);
1189         q->queue = NULL;
1190         q->nr = q->alloc = 0;
1191 }
1192
1193 static void diffcore_apply_filter(const char *filter)
1194 {
1195         int i;
1196         struct diff_queue_struct *q = &diff_queued_diff;
1197         struct diff_queue_struct outq;
1198         outq.queue = NULL;
1199         outq.nr = outq.alloc = 0;
1200
1201         if (!filter)
1202                 return;
1203
1204         if (strchr(filter, DIFF_STATUS_FILTER_AON)) {
1205                 int found;
1206                 for (i = found = 0; !found && i < q->nr; i++) {
1207                         struct diff_filepair *p = q->queue[i];
1208                         if (((p->status == DIFF_STATUS_MODIFIED) &&
1209                              ((p->score &&
1210                                strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
1211                               (!p->score &&
1212                                strchr(filter, DIFF_STATUS_MODIFIED)))) ||
1213                             ((p->status != DIFF_STATUS_MODIFIED) &&
1214                              strchr(filter, p->status)))
1215                                 found++;
1216                 }
1217                 if (found)
1218                         return;
1219
1220                 /* otherwise we will clear the whole queue
1221                  * by copying the empty outq at the end of this
1222                  * function, but first clear the current entries
1223                  * in the queue.
1224                  */
1225                 for (i = 0; i < q->nr; i++)
1226                         diff_free_filepair(q->queue[i]);
1227         }
1228         else {
1229                 /* Only the matching ones */
1230                 for (i = 0; i < q->nr; i++) {
1231                         struct diff_filepair *p = q->queue[i];
1232
1233                         if (((p->status == DIFF_STATUS_MODIFIED) &&
1234                              ((p->score &&
1235                                strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
1236                               (!p->score &&
1237                                strchr(filter, DIFF_STATUS_MODIFIED)))) ||
1238                             ((p->status != DIFF_STATUS_MODIFIED) &&
1239                              strchr(filter, p->status)))
1240                                 diff_q(&outq, p);
1241                         else
1242                                 diff_free_filepair(p);
1243                 }
1244         }
1245         free(q->queue);
1246         *q = outq;
1247 }
1248
1249 void diffcore_std(struct diff_options *options)
1250 {
1251         if (options->paths && options->paths[0])
1252                 diffcore_pathspec(options->paths);
1253         if (options->break_opt != -1)
1254                 diffcore_break(options->break_opt);
1255         if (options->detect_rename)
1256                 diffcore_rename(options);
1257         if (options->break_opt != -1)
1258                 diffcore_merge_broken();
1259         if (options->pickaxe)
1260                 diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
1261         if (options->orderfile)
1262                 diffcore_order(options->orderfile);
1263         diff_resolve_rename_copy();
1264         diffcore_apply_filter(options->filter);
1265 }
1266
1267
1268 void diffcore_std_no_resolve(struct diff_options *options)
1269 {
1270         if (options->pickaxe)
1271                 diffcore_pickaxe(options->pickaxe, options->pickaxe_opts);
1272         if (options->orderfile)
1273                 diffcore_order(options->orderfile);
1274         diffcore_apply_filter(options->filter);
1275 }
1276
1277 void diff_addremove(struct diff_options *options,
1278                     int addremove, unsigned mode,
1279                     const unsigned char *sha1,
1280                     const char *base, const char *path)
1281 {
1282         char concatpath[PATH_MAX];
1283         struct diff_filespec *one, *two;
1284
1285         /* This may look odd, but it is a preparation for
1286          * feeding "there are unchanged files which should
1287          * not produce diffs, but when you are doing copy
1288          * detection you would need them, so here they are"
1289          * entries to the diff-core.  They will be prefixed
1290          * with something like '=' or '*' (I haven't decided
1291          * which but should not make any difference).
1292          * Feeding the same new and old to diff_change() 
1293          * also has the same effect.
1294          * Before the final output happens, they are pruned after
1295          * merged into rename/copy pairs as appropriate.
1296          */
1297         if (options->reverse_diff)
1298                 addremove = (addremove == '+' ? '-' :
1299                              addremove == '-' ? '+' : addremove);
1300
1301         if (!path) path = "";
1302         sprintf(concatpath, "%s%s", base, path);
1303         one = alloc_filespec(concatpath);
1304         two = alloc_filespec(concatpath);
1305
1306         if (addremove != '+')
1307                 fill_filespec(one, sha1, mode);
1308         if (addremove != '-')
1309                 fill_filespec(two, sha1, mode);
1310
1311         diff_queue(&diff_queued_diff, one, two);
1312 }
1313
1314 void diff_change(struct diff_options *options,
1315                  unsigned old_mode, unsigned new_mode,
1316                  const unsigned char *old_sha1,
1317                  const unsigned char *new_sha1,
1318                  const char *base, const char *path) 
1319 {
1320         char concatpath[PATH_MAX];
1321         struct diff_filespec *one, *two;
1322
1323         if (options->reverse_diff) {
1324                 unsigned tmp;
1325                 const unsigned char *tmp_c;
1326                 tmp = old_mode; old_mode = new_mode; new_mode = tmp;
1327                 tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
1328         }
1329         if (!path) path = "";
1330         sprintf(concatpath, "%s%s", base, path);
1331         one = alloc_filespec(concatpath);
1332         two = alloc_filespec(concatpath);
1333         fill_filespec(one, old_sha1, old_mode);
1334         fill_filespec(two, new_sha1, new_mode);
1335
1336         diff_queue(&diff_queued_diff, one, two);
1337 }
1338
1339 void diff_unmerge(struct diff_options *options,
1340                   const char *path)
1341 {
1342         struct diff_filespec *one, *two;
1343         one = alloc_filespec(path);
1344         two = alloc_filespec(path);
1345         diff_queue(&diff_queued_diff, one, two);
1346 }