combine-diff: add a bit more comments.
[git.git] / combine-diff.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
6
7 static int uninteresting(struct diff_filepair *p)
8 {
9         if (diff_unmodified_pair(p))
10                 return 1;
11         if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
12                 return 1;
13         return 0;
14 }
15
16 static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
17 {
18         struct diff_queue_struct *q = &diff_queued_diff;
19         struct combine_diff_path *p;
20         int i;
21
22         if (!n) {
23                 struct combine_diff_path *list = NULL, **tail = &list;
24                 for (i = 0; i < q->nr; i++) {
25                         int len;
26                         const char *path;
27                         if (uninteresting(q->queue[i]))
28                                 continue;
29                         path = q->queue[i]->two->path;
30                         len = strlen(path);
31
32                         p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
33                         p->path = (char*) &(p->parent_sha1[num_parent][0]);
34                         memcpy(p->path, path, len);
35                         p->path[len] = 0;
36                         p->len = len;
37                         p->next = NULL;
38                         memcpy(p->sha1, q->queue[i]->two->sha1, 20);
39                         memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
40                         *tail = p;
41                         tail = &p->next;
42                 }
43                 return list;
44         }
45
46         for (p = curr; p; p = p->next) {
47                 int found = 0;
48                 if (!p->len)
49                         continue;
50                 for (i = 0; i < q->nr; i++) {
51                         const char *path;
52                         int len;
53
54                         if (uninteresting(q->queue[i]))
55                                 continue;
56                         path = q->queue[i]->two->path;
57                         len = strlen(path);
58                         if (len == p->len && !memcmp(path, p->path, len)) {
59                                 found = 1;
60                                 memcpy(p->parent_sha1[n],
61                                        q->queue[i]->one->sha1, 20);
62                                 break;
63                         }
64                 }
65                 if (!found)
66                         p->len = 0;
67         }
68         return curr;
69 }
70
71 /* Lines lost from parent */
72 struct lline {
73         struct lline *next;
74         int len;
75         unsigned long parent_map;
76         char line[FLEX_ARRAY];
77 };
78
79 /* Lines surviving in the merge result */
80 struct sline {
81         struct lline *lost_head, **lost_tail;
82         char *bol;
83         int len;
84         /* bit 0 up to (N-1) are on if the parent does _not_
85          * have this line (i.e. we changed it).
86          * bit N is used for "interesting" lines, including context.
87          */
88         unsigned long flag;
89 };
90
91 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
92 {
93         char *blob;
94         char type[20];
95         if (!memcmp(sha1, null_sha1, 20)) {
96                 /* deleted blob */
97                 *size = 0;
98                 return xcalloc(1, 1);
99         }
100         blob = read_sha1_file(sha1, type, size);
101         if (strcmp(type, "blob"))
102                 die("object '%s' is not a blob!", sha1_to_hex(sha1));
103         return blob;
104 }
105
106 #define TMPPATHLEN 50
107 #define MAXLINELEN 10240
108
109 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
110 {
111         int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
112         if (fd < 0)
113                 die("unable to create temp-file");
114         if (write(fd, blob, size) != size)
115                 die("unable to write temp-file");
116         close(fd);
117 }
118
119 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
120 {
121         unsigned long size;
122         void *blob;
123         blob = grab_blob(sha1, &size);
124         write_to_temp_file(tmpfile, blob, size);
125         free(blob);
126 }
127
128 static int parse_num(char **cp_p, unsigned int *num_p)
129 {
130         char *cp = *cp_p;
131         unsigned int num = 0;
132         int read_some;
133
134         while ('0' <= *cp && *cp <= '9')
135                 num = num * 10 + *cp++ - '0';
136         if (!(read_some = cp - *cp_p))
137                 return -1;
138         *cp_p = cp;
139         *num_p = num;
140         return 0;
141 }
142
143 static int parse_hunk_header(char *line, int len,
144                              unsigned int *ob, unsigned int *on,
145                              unsigned int *nb, unsigned int *nn)
146 {
147         char *cp;
148         cp = line + 4;
149         if (parse_num(&cp, ob)) {
150         bad_line:
151                 return error("malformed diff output: %s", line);
152         }
153         if (*cp == ',') {
154                 cp++;
155                 if (parse_num(&cp, on))
156                         goto bad_line;
157         }
158         else
159                 *on = 1;
160         if (*cp++ != ' ' || *cp++ != '+')
161                 goto bad_line;
162         if (parse_num(&cp, nb))
163                 goto bad_line;
164         if (*cp == ',') {
165                 cp++;
166                 if (parse_num(&cp, nn))
167                         goto bad_line;
168         }
169         else
170                 *nn = 1;
171         return -!!memcmp(cp, " @@", 3);
172 }
173
174 static void append_lost(struct sline *sline, int n, const char *line)
175 {
176         struct lline *lline;
177         int len = strlen(line);
178         unsigned long this_mask = (1UL<<n);
179         if (line[len-1] == '\n')
180                 len--;
181
182         /* Check to see if we can squash things */
183         if (sline->lost_head) {
184                 struct lline *last_one = NULL;
185                 /* We cannot squash it with earlier one */
186                 for (lline = sline->lost_head;
187                      lline;
188                      lline = lline->next)
189                         if (lline->parent_map & this_mask)
190                                 last_one = lline;
191                 lline = last_one ? last_one->next : sline->lost_head;
192                 while (lline) {
193                         if (lline->len == len &&
194                             !memcmp(lline->line, line, len)) {
195                                 lline->parent_map |= this_mask;
196                                 return;
197                         }
198                         lline = lline->next;
199                 }
200         }
201
202         lline = xmalloc(sizeof(*lline) + len + 1);
203         lline->len = len;
204         lline->next = NULL;
205         lline->parent_map = this_mask;
206         memcpy(lline->line, line, len);
207         lline->line[len] = 0;
208         *sline->lost_tail = lline;
209         sline->lost_tail = &lline->next;
210 }
211
212 static void combine_diff(const unsigned char *parent, const char *ourtmp,
213                          struct sline *sline, int cnt, int n)
214 {
215         FILE *in;
216         char parent_tmp[TMPPATHLEN];
217         char cmd[TMPPATHLEN * 2 + 1024];
218         char line[MAXLINELEN];
219         unsigned int lno, ob, on, nb, nn;
220         unsigned long pmask = ~(1UL << n);
221         struct sline *lost_bucket = NULL;
222
223         write_temp_blob(parent_tmp, parent);
224         sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
225                 parent_tmp, ourtmp);
226         in = popen(cmd, "r");
227         if (!in)
228                 return;
229
230         lno = 1;
231         while (fgets(line, sizeof(line), in) != NULL) {
232                 int len = strlen(line);
233                 if (5 < len && !memcmp("@@ -", line, 4)) {
234                         if (parse_hunk_header(line, len,
235                                               &ob, &on, &nb, &nn))
236                                 break;
237                         lno = nb;
238                         if (!nb) {
239                                 /* @@ -1,2 +0,0 @@ to remove the
240                                  * first two lines...
241                                  */
242                                 nb = 1;
243                         }
244                         if (nn == 0)
245                                 lost_bucket = &sline[nb];
246                         else
247                                 lost_bucket = &sline[nb-1];
248                         continue;
249                 }
250                 if (!lost_bucket)
251                         continue; /* not in any hunk yet */
252                 switch (line[0]) {
253                 case '-':
254                         append_lost(lost_bucket, n, line+1);
255                         break;
256                 case '+':
257                         sline[lno-1].flag &= pmask;
258                         lno++;
259                         break;
260                 }
261         }
262         fclose(in);
263         unlink(parent_tmp);
264 }
265
266 static unsigned long context = 3;
267 static char combine_marker = '@';
268
269 static int interesting(struct sline *sline, unsigned long all_mask)
270 {
271         return ((sline->flag & all_mask) != all_mask || sline->lost_head);
272 }
273
274 static unsigned long adjust_hunk_tail(struct sline *sline,
275                                       unsigned long all_mask,
276                                       unsigned long hunk_begin,
277                                       unsigned long i)
278 {
279         /* i points at the first uninteresting line.
280          * If the last line of the hunk was interesting
281          * only because it has some deletion, then
282          * it is not all that interesting for the
283          * purpose of giving trailing context lines.
284          */
285         if ((hunk_begin + 1 <= i) &&
286             ((sline[i-1].flag & all_mask) == all_mask))
287                 i--;
288         return i;
289 }
290
291 static unsigned long next_interesting(struct sline *sline,
292                                       unsigned long mark,
293                                       unsigned long i,
294                                       unsigned long cnt,
295                                       int uninteresting)
296 {
297         while (i < cnt)
298                 if (uninteresting ?
299                     !(sline[i].flag & mark) :
300                     (sline[i].flag & mark))
301                         return i;
302                 else
303                         i++;
304         return cnt;
305 }
306
307 static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
308 {
309         unsigned long all_mask = (1UL<<num_parent) - 1;
310         unsigned long mark = (1UL<<num_parent);
311         unsigned long i;
312
313         i = next_interesting(sline, mark, 0, cnt, 0);
314         if (cnt <= i)
315                 return 0;
316
317         while (i < cnt) {
318                 unsigned long j = (context < i) ? (i - context) : 0;
319                 unsigned long k;
320                 while (j < i)
321                         sline[j++].flag |= mark;
322
323         again:
324                 j = next_interesting(sline, mark, i, cnt, 1);
325                 if (cnt <= j)
326                         break; /* the rest are all interesting */
327
328                 /* lookahead context lines */
329                 k = next_interesting(sline, mark, j, cnt, 0);
330                 j = adjust_hunk_tail(sline, all_mask, i, j);
331
332                 if (k < j + context) {
333                         /* k is interesting and [j,k) are not, but
334                          * paint them interesting because the gap is small.
335                          */
336                         while (j < k)
337                                 sline[j++].flag |= mark;
338                         i = k;
339                         goto again;
340                 }
341
342                 /* j is the first uninteresting line and there is
343                  * no overlap beyond it within context lines.
344                  */
345                 i = k;
346                 k = (j + context < cnt) ? j + context : cnt;
347                 while (j < k)
348                         sline[j++].flag |= mark;
349         }
350         return 1;
351 }
352
353 static int make_hunks(struct sline *sline, unsigned long cnt,
354                        int num_parent, int dense)
355 {
356         unsigned long all_mask = (1UL<<num_parent) - 1;
357         unsigned long mark = (1UL<<num_parent);
358         unsigned long i;
359         int has_interesting = 0;
360
361         for (i = 0; i < cnt; i++) {
362                 if (interesting(&sline[i], all_mask))
363                         sline[i].flag |= mark;
364                 else
365                         sline[i].flag &= ~mark;
366         }
367         if (!dense)
368                 return give_context(sline, cnt, num_parent);
369
370         /* Look at each hunk, and if we have changes from only one
371          * parent, or the changes are the same from all but one
372          * parent, mark that uninteresting.
373          */
374         i = 0;
375         while (i < cnt) {
376                 unsigned long j, hunk_begin, hunk_end;
377                 unsigned long same_diff;
378                 while (i < cnt && !(sline[i].flag & mark))
379                         i++;
380                 if (cnt <= i)
381                         break; /* No more interesting hunks */
382                 hunk_begin = i;
383                 for (j = i + 1; j < cnt; j++) {
384                         if (!(sline[j].flag & mark)) {
385                                 /* Look beyond the end to see if there
386                                  * is an interesting line after this
387                                  * hunk within context span.
388                                  */
389                                 unsigned long la; /* lookahead */
390                                 int contin = 0;
391                                 la = adjust_hunk_tail(sline, all_mask,
392                                                      hunk_begin, j);
393                                 la = (la + context < cnt) ?
394                                         (la + context) : cnt;
395                                 while (j <= --la) {
396                                         if (sline[la].flag & mark) {
397                                                 contin = 1;
398                                                 break;
399                                         }
400                                 }
401                                 if (!contin)
402                                         break;
403                                 j = la;
404                         }
405                 }
406                 hunk_end = j;
407
408                 /* [i..hunk_end) are interesting.  Now is it really
409                  * interesting?  We check if there are only two versions
410                  * and the result matches one of them.  That is, we look
411                  * at:
412                  *   (+) line, which records lines added to which parents;
413                  *       this line appears in the result.
414                  *   (-) line, which records from what parents the line
415                  *       was removed; this line does not appear in the result.
416                  * then check the set of parents the result has difference
417                  * from, from all lines.  If there are lines that has
418                  * different set of parents that the result has differences
419                  * from, that means we have more than two versions.
420                  *
421                  * Even when we have only two versions, if the result does
422                  * not match any of the parents, the it should be considered
423                  * interesting.  In such a case, we would have all '+' line.
424                  * After passing the above "two versions" test, that would
425                  * appear as "the same set of parents" to be "all parents".
426                  */
427                 same_diff = 0;
428                 has_interesting = 0;
429                 for (j = i; j < hunk_end && !has_interesting; j++) {
430                         unsigned long this_diff = ~sline[j].flag & all_mask;
431                         struct lline *ll = sline[j].lost_head;
432                         if (this_diff) {
433                                 /* This has some changes.  Is it the
434                                  * same as others?
435                                  */
436                                 if (!same_diff)
437                                         same_diff = this_diff;
438                                 else if (same_diff != this_diff) {
439                                         has_interesting = 1;
440                                         break;
441                                 }
442                         }
443                         while (ll && !has_interesting) {
444                                 /* Lost this line from these parents;
445                                  * who are they?  Are they the same?
446                                  */
447                                 this_diff = ll->parent_map;
448                                 if (!same_diff)
449                                         same_diff = this_diff;
450                                 else if (same_diff != this_diff) {
451                                         has_interesting = 1;
452                                 }
453                                 ll = ll->next;
454                         }
455                 }
456
457                 if (!has_interesting && same_diff != all_mask) {
458                         /* This hunk is not that interesting after all */
459                         for (j = hunk_begin; j < hunk_end; j++)
460                                 sline[j].flag &= ~mark;
461                 }
462                 i = hunk_end;
463         }
464
465         has_interesting = give_context(sline, cnt, num_parent);
466         return has_interesting;
467 }
468
469 static void dump_sline(struct sline *sline, int cnt, int num_parent)
470 {
471         unsigned long mark = (1UL<<num_parent);
472         int i;
473         int lno = 0;
474
475         while (1) {
476                 struct sline *sl = &sline[lno];
477                 int hunk_end;
478                 while (lno < cnt && !(sline[lno].flag & mark))
479                         lno++;
480                 if (cnt <= lno)
481                         break;
482                 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
483                         if (!(sline[hunk_end].flag & mark))
484                                 break;
485                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
486                 printf(" +%d,%d ", lno+1, hunk_end-lno);
487                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
488                 putchar('\n');
489                 while (lno < hunk_end) {
490                         struct lline *ll;
491                         int j;
492                         sl = &sline[lno++];
493                         ll = sl->lost_head;
494                         while (ll) {
495                                 for (j = 0; j < num_parent; j++) {
496                                         if (ll->parent_map & (1UL<<j))
497                                                 putchar('-');
498                                         else
499                                                 putchar(' ');
500                                 }
501                                 puts(ll->line);
502                                 ll = ll->next;
503                         }
504                         for (j = 0; j < num_parent; j++) {
505                                 if ((1UL<<j) & sl->flag)
506                                         putchar(' ');
507                                 else
508                                         putchar('+');
509                         }
510                         printf("%.*s\n", sl->len, sl->bol);
511                 }
512         }
513 }
514
515 static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
516                                int i, int j)
517 {
518         /* We have already examined parent j and we know parent i
519          * and parent j are the same, so reuse the combined result
520          * of parent j for parent i.
521          */
522         unsigned long lno, imask, jmask;
523         imask = (1UL<<i);
524         jmask = (1UL<<j);
525
526         for (lno = 0; lno < cnt; lno++) {
527                 struct lline *ll = sline->lost_head;
528                 while (ll) {
529                         if (ll->parent_map & jmask)
530                                 ll->parent_map |= imask;
531                         ll = ll->next;
532                 }
533                 if (!(sline->flag & jmask))
534                         sline->flag &= ~imask;
535                 sline++;
536         }
537 }
538
539 int show_combined_diff(struct combine_diff_path *elem, int num_parent,
540                        int dense, const char *header, int show_empty)
541 {
542         unsigned long size, cnt, lno;
543         char *result, *cp, *ep;
544         struct sline *sline; /* survived lines */
545         int i, show_hunks, shown_header = 0;
546         char ourtmp_buf[TMPPATHLEN];
547         char *ourtmp = ourtmp_buf;
548
549         /* Read the result of merge first */
550         if (memcmp(elem->sha1, null_sha1, 20)) {
551                 result = grab_blob(elem->sha1, &size);
552                 write_to_temp_file(ourtmp, result, size);
553         }
554         else {
555                 struct stat st;
556                 int fd;
557                 ourtmp = elem->path;
558                 if (0 <= (fd = open(ourtmp, O_RDONLY)) &&
559                     !fstat(fd, &st)) {
560                         int len = st.st_size;
561                         int cnt = 0;
562
563                         size = len;
564                         result = xmalloc(len + 1);
565                         while (cnt < len) {
566                                 int done = xread(fd, result+cnt, len-cnt);
567                                 if (done == 0)
568                                         break;
569                                 if (done < 0)
570                                         die("read error '%s'", ourtmp);
571                                 cnt += done;
572                         }
573                         result[len] = 0;
574                 }
575                 else {
576                         /* deleted file */
577                         size = 0;
578                         result = xmalloc(1);
579                         result[0] = 0;
580                         ourtmp = "/dev/null";
581                 }
582                 if (0 <= fd)
583                         close(fd);
584         }
585
586         for (cnt = 0, cp = result; cp - result < size; cp++) {
587                 if (*cp == '\n')
588                         cnt++;
589         }
590         if (result[size-1] != '\n')
591                 cnt++; /* incomplete line */
592
593         sline = xcalloc(cnt, sizeof(*sline));
594         ep = result;
595         sline[0].bol = result;
596         for (lno = 0, cp = result; cp - result < size; cp++) {
597                 if (*cp == '\n') {
598                         sline[lno].lost_tail = &sline[lno].lost_head;
599                         sline[lno].len = cp - sline[lno].bol;
600                         sline[lno].flag = (1UL<<num_parent) - 1;
601                         lno++;
602                         if (lno < cnt)
603                                 sline[lno].bol = cp + 1;
604                 }
605         }
606         if (result[size-1] != '\n') {
607                 sline[cnt-1].lost_tail = &sline[cnt-1].lost_head;
608                 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
609                 sline[cnt-1].flag = (1UL<<num_parent) - 1;
610         }
611
612         for (i = 0; i < num_parent; i++) {
613                 int j;
614                 for (j = 0; j < i; j++) {
615                         if (!memcmp(elem->parent_sha1[i],
616                                     elem->parent_sha1[j], 20)) {
617                                 reuse_combine_diff(sline, cnt, i, j);
618                                 break;
619                         }
620                 }
621                 if (i <= j)
622                         combine_diff(elem->parent_sha1[i], ourtmp, sline,
623                                      cnt, i);
624         }
625
626         show_hunks = make_hunks(sline, cnt, num_parent, dense);
627
628         if (header && (show_hunks || show_empty)) {
629                 shown_header++;
630                 puts(header);
631         }
632         if (show_hunks) {
633                 printf("diff --%s ", dense ? "cc" : "combined");
634                 if (quote_c_style(elem->path, NULL, NULL, 0))
635                         quote_c_style(elem->path, NULL, stdout, 0);
636                 else
637                         printf("%s", elem->path);
638                 putchar('\n');
639                 printf("index ");
640                 for (i = 0; i < num_parent; i++) {
641                         printf("%s%s",
642                                i ? ".." : "",
643                                find_unique_abbrev(elem->parent_sha1[i],
644                                                   DEFAULT_ABBREV));
645                 }
646                 printf("->%s\n",
647                        find_unique_abbrev(elem->sha1, DEFAULT_ABBREV));
648                 dump_sline(sline, cnt, num_parent);
649         }
650         if (ourtmp == ourtmp_buf)
651                 unlink(ourtmp);
652         free(result);
653
654         for (i = 0; i < cnt; i++) {
655                 if (sline[i].lost_head) {
656                         struct lline *ll = sline[i].lost_head;
657                         while (ll) {
658                                 struct lline *tmp = ll;
659                                 ll = ll->next;
660                                 free(tmp);
661                         }
662                 }
663         }
664         free(sline);
665         return shown_header;
666 }
667
668 int diff_tree_combined_merge(const unsigned char *sha1,
669                              const char *header,
670                              int show_empty_merge, int dense)
671 {
672         struct commit *commit = lookup_commit(sha1);
673         struct diff_options diffopts;
674         struct commit_list *parents;
675         struct combine_diff_path *p, *paths = NULL;
676         int num_parent, i, num_paths;
677
678         diff_setup(&diffopts);
679         diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
680         diffopts.recursive = 1;
681
682         /* count parents */
683         for (parents = commit->parents, num_parent = 0;
684              parents;
685              parents = parents->next, num_parent++)
686                 ; /* nothing */
687
688         /* find set of paths that everybody touches */
689         for (parents = commit->parents, i = 0;
690              parents;
691              parents = parents->next, i++) {
692                 struct commit *parent = parents->item;
693                 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
694                                &diffopts);
695                 paths = intersect_paths(paths, i, num_parent);
696                 diff_flush(&diffopts);
697         }
698
699         /* find out surviving paths */
700         for (num_paths = 0, p = paths; p; p = p->next) {
701                 if (p->len)
702                         num_paths++;
703         }
704         if (num_paths || show_empty_merge) {
705                 for (p = paths; p; p = p->next) {
706                         if (!p->len)
707                                 continue;
708                         if (show_combined_diff(p, num_parent, dense, header,
709                                                show_empty_merge))
710                                 header = NULL;
711                 }
712         }
713
714         /* Clean things up */
715         while (paths) {
716                 struct combine_diff_path *tmp = paths;
717                 paths = paths->next;
718                 free(tmp);
719         }
720         return 0;
721 }