combine-diff: fix placement of deletion.
[git.git] / combine-diff.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
6
7 static int uninteresting(struct diff_filepair *p)
8 {
9         if (diff_unmodified_pair(p))
10                 return 1;
11         if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
12                 return 1;
13         return 0;
14 }
15
16 static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
17 {
18         struct diff_queue_struct *q = &diff_queued_diff;
19         struct combine_diff_path *p;
20         int i;
21
22         if (!n) {
23                 struct combine_diff_path *list = NULL, **tail = &list;
24                 for (i = 0; i < q->nr; i++) {
25                         int len;
26                         const char *path;
27                         if (uninteresting(q->queue[i]))
28                                 continue;
29                         path = q->queue[i]->two->path;
30                         len = strlen(path);
31
32                         p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
33                         p->path = (char*) &(p->parent_sha1[num_parent][0]);
34                         memcpy(p->path, path, len);
35                         p->path[len] = 0;
36                         p->len = len;
37                         p->next = NULL;
38                         memcpy(p->sha1, q->queue[i]->two->sha1, 20);
39                         memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
40                         *tail = p;
41                         tail = &p->next;
42                 }
43                 return list;
44         }
45
46         for (p = curr; p; p = p->next) {
47                 int found = 0;
48                 if (!p->len)
49                         continue;
50                 for (i = 0; i < q->nr; i++) {
51                         const char *path;
52                         int len;
53
54                         if (uninteresting(q->queue[i]))
55                                 continue;
56                         path = q->queue[i]->two->path;
57                         len = strlen(path);
58                         if (len == p->len && !memcmp(path, p->path, len)) {
59                                 found = 1;
60                                 memcpy(p->parent_sha1[n],
61                                        q->queue[i]->one->sha1, 20);
62                                 break;
63                         }
64                 }
65                 if (!found)
66                         p->len = 0;
67         }
68         return curr;
69 }
70
71 struct lline {
72         struct lline *next;
73         int len;
74         unsigned long parent_map;
75         char line[FLEX_ARRAY];
76 };
77
78 struct sline {
79         struct lline *lost_head, **lost_tail;
80         char *bol;
81         int len;
82         unsigned long flag;
83 };
84
85 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
86 {
87         char *blob;
88         char type[20];
89         if (!memcmp(sha1, null_sha1, 20)) {
90                 /* deleted blob */
91                 *size = 0;
92                 return xcalloc(1, 1);
93         }
94         blob = read_sha1_file(sha1, type, size);
95         if (strcmp(type, "blob"))
96                 die("object '%s' is not a blob!", sha1_to_hex(sha1));
97         return blob;
98 }
99
100 #define TMPPATHLEN 50
101 #define MAXLINELEN 10240
102
103 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
104 {
105         int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
106         if (fd < 0)
107                 die("unable to create temp-file");
108         if (write(fd, blob, size) != size)
109                 die("unable to write temp-file");
110         close(fd);
111 }
112
113 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
114 {
115         unsigned long size;
116         void *blob;
117         blob = grab_blob(sha1, &size);
118         write_to_temp_file(tmpfile, blob, size);
119         free(blob);
120 }
121
122 static int parse_num(char **cp_p, unsigned int *num_p)
123 {
124         char *cp = *cp_p;
125         unsigned int num = 0;
126         int read_some;
127
128         while ('0' <= *cp && *cp <= '9')
129                 num = num * 10 + *cp++ - '0';
130         if (!(read_some = cp - *cp_p))
131                 return -1;
132         *cp_p = cp;
133         *num_p = num;
134         return 0;
135 }
136
137 static int parse_hunk_header(char *line, int len,
138                              unsigned int *ob, unsigned int *on,
139                              unsigned int *nb, unsigned int *nn)
140 {
141         char *cp;
142         cp = line + 4;
143         if (parse_num(&cp, ob)) {
144         bad_line:
145                 return error("malformed diff output: %s", line);
146         }
147         if (*cp == ',') {
148                 cp++;
149                 if (parse_num(&cp, on))
150                         goto bad_line;
151         }
152         else
153                 *on = 1;
154         if (*cp++ != ' ' || *cp++ != '+')
155                 goto bad_line;
156         if (parse_num(&cp, nb))
157                 goto bad_line;
158         if (*cp == ',') {
159                 cp++;
160                 if (parse_num(&cp, nn))
161                         goto bad_line;
162         }
163         else
164                 *nn = 1;
165         return -!!memcmp(cp, " @@", 3);
166 }
167
168 static void append_lost(struct sline *sline, int n, const char *line)
169 {
170         struct lline *lline;
171         int len = strlen(line);
172         unsigned long this_mask = (1UL<<n);
173         if (line[len-1] == '\n')
174                 len--;
175
176         /* Check to see if we can squash things */
177         if (sline->lost_head) {
178                 struct lline *last_one = NULL;
179                 /* We cannot squash it with earlier one */
180                 for (lline = sline->lost_head;
181                      lline;
182                      lline = lline->next)
183                         if (lline->parent_map & this_mask)
184                                 last_one = lline;
185                 lline = last_one ? last_one->next : sline->lost_head;
186                 while (lline) {
187                         if (lline->len == len &&
188                             !memcmp(lline->line, line, len)) {
189                                 lline->parent_map |= this_mask;
190                                 return;
191                         }
192                         lline = lline->next;
193                 }
194         }
195
196         lline = xmalloc(sizeof(*lline) + len + 1);
197         lline->len = len;
198         lline->next = NULL;
199         lline->parent_map = this_mask;
200         memcpy(lline->line, line, len);
201         lline->line[len] = 0;
202         *sline->lost_tail = lline;
203         sline->lost_tail = &lline->next;
204 }
205
206 static void combine_diff(const unsigned char *parent, const char *ourtmp,
207                          struct sline *sline, int cnt, int n)
208 {
209         FILE *in;
210         char parent_tmp[TMPPATHLEN];
211         char cmd[TMPPATHLEN * 2 + 1024];
212         char line[MAXLINELEN];
213         unsigned int lno, ob, on, nb, nn;
214         unsigned long pmask = ~(1UL << n);
215         struct sline *lost_bucket = NULL;
216
217         write_temp_blob(parent_tmp, parent);
218         sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
219                 parent_tmp, ourtmp);
220         in = popen(cmd, "r");
221         if (!in)
222                 return;
223
224         lno = 1;
225         while (fgets(line, sizeof(line), in) != NULL) {
226                 int len = strlen(line);
227                 if (5 < len && !memcmp("@@ -", line, 4)) {
228                         if (parse_hunk_header(line, len,
229                                               &ob, &on, &nb, &nn))
230                                 break;
231                         lno = nb;
232                         if (!nb) {
233                                 /* @@ -1,2 +0,0 @@ to remove the
234                                  * first two lines...
235                                  */
236                                 nb = 1;
237                         }
238                         if (nn == 0)
239                                 lost_bucket = &sline[nb];
240                         else
241                                 lost_bucket = &sline[nb-1];
242                         continue;
243                 }
244                 if (!lost_bucket)
245                         continue;
246                 switch (line[0]) {
247                 case '-':
248                         append_lost(lost_bucket, n, line+1);
249                         break;
250                 case '+':
251                         sline[lno-1].flag &= pmask;
252                         lno++;
253                         break;
254                 }
255         }
256         fclose(in);
257         unlink(parent_tmp);
258 }
259
260 static unsigned long context = 3;
261 static char combine_marker = '@';
262
263 static int interesting(struct sline *sline, unsigned long all_mask)
264 {
265         return ((sline->flag & all_mask) != all_mask || sline->lost_head);
266 }
267
268 static unsigned long adjust_hunk_tail(struct sline *sline,
269                                       unsigned long all_mask,
270                                       unsigned long hunk_begin,
271                                       unsigned long i)
272 {
273         /* i points at the first uninteresting line.
274          * If the last line of the hunk was interesting
275          * only because it has some deletion, then
276          * it is not all that interesting for the
277          * purpose of giving trailing context lines.
278          */
279         if ((hunk_begin + 1 <= i) &&
280             ((sline[i-1].flag & all_mask) == all_mask))
281                 i--;
282         return i;
283 }
284
285 static unsigned long next_interesting(struct sline *sline,
286                                       unsigned long mark,
287                                       unsigned long i,
288                                       unsigned long cnt,
289                                       int uninteresting)
290 {
291         while (i < cnt)
292                 if (uninteresting ?
293                     !(sline[i].flag & mark) :
294                     (sline[i].flag & mark))
295                         return i;
296                 else
297                         i++;
298         return cnt;
299 }
300
301 static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
302 {
303         unsigned long all_mask = (1UL<<num_parent) - 1;
304         unsigned long mark = (1UL<<num_parent);
305         unsigned long i;
306
307         i = next_interesting(sline, mark, 0, cnt, 0);
308         if (cnt <= i)
309                 return 0;
310
311         while (i < cnt) {
312                 unsigned long j = (context < i) ? (i - context) : 0;
313                 unsigned long k;
314                 while (j < i)
315                         sline[j++].flag |= mark;
316
317         again:
318                 j = next_interesting(sline, mark, i, cnt, 1);
319                 if (cnt <= j)
320                         break; /* the rest are all interesting */
321
322                 /* lookahead context lines */
323                 k = next_interesting(sline, mark, j, cnt, 0);
324                 j = adjust_hunk_tail(sline, all_mask, i, j);
325
326                 if (k < j + context) {
327                         /* k is interesting and [j,k) are not, but
328                          * paint them interesting because the gap is small.
329                          */
330                         while (j < k)
331                                 sline[j++].flag |= mark;
332                         i = k;
333                         goto again;
334                 }
335
336                 /* j is the first uninteresting line and there is
337                  * no overlap beyond it within context lines.
338                  */
339                 i = k;
340                 k = (j + context < cnt) ? j + context : cnt;
341                 while (j < k)
342                         sline[j++].flag |= mark;
343         }
344         return 1;
345 }
346
347 static int make_hunks(struct sline *sline, unsigned long cnt,
348                        int num_parent, int dense)
349 {
350         unsigned long all_mask = (1UL<<num_parent) - 1;
351         unsigned long mark = (1UL<<num_parent);
352         unsigned long i;
353         int has_interesting = 0;
354
355         for (i = 0; i < cnt; i++) {
356                 if (interesting(&sline[i], all_mask))
357                         sline[i].flag |= mark;
358                 else
359                         sline[i].flag &= ~mark;
360         }
361         if (!dense)
362                 return give_context(sline, cnt, num_parent);
363
364         /* Look at each hunk, and if we have changes from only one
365          * parent, or the changes are the same from all but one
366          * parent, mark that uninteresting.
367          */
368         i = 0;
369         while (i < cnt) {
370                 unsigned long j, hunk_begin, hunk_end;
371                 unsigned long same_diff;
372                 while (i < cnt && !(sline[i].flag & mark))
373                         i++;
374                 if (cnt <= i)
375                         break; /* No more interesting hunks */
376                 hunk_begin = i;
377                 for (j = i + 1; j < cnt; j++) {
378                         if (!(sline[j].flag & mark)) {
379                                 /* Look beyond the end to see if there
380                                  * is an interesting line after this
381                                  * hunk within context span.
382                                  */
383                                 unsigned long la; /* lookahead */
384                                 int contin = 0;
385                                 la = adjust_hunk_tail(sline, all_mask,
386                                                      hunk_begin, j);
387                                 la = (la + context < cnt) ?
388                                         (la + context) : cnt;
389                                 while (j <= --la) {
390                                         if (sline[la].flag & mark) {
391                                                 contin = 1;
392                                                 break;
393                                         }
394                                 }
395                                 if (!contin)
396                                         break;
397                                 j = la;
398                         }
399                 }
400                 hunk_end = j;
401
402                 /* [i..hunk_end) are interesting.  Now is it really
403                  * interesting?  We check if there are only two versions
404                  * and the result matches one of them.  That is, we look
405                  * at:
406                  *   (+) line, which records lines added to which parents;
407                  *       this line appears in the result.
408                  *   (-) line, which records from what parents the line
409                  *       was removed; this line does not appear in the result.
410                  * then check the set of parents the result has difference
411                  * from, from all lines.  If there are lines that has
412                  * different set of parents that the result has differences
413                  * from, that means we have more than two versions.
414                  *
415                  * Even when we have only two versions, if the result does
416                  * not match any of the parents, the it should be considered
417                  * interesting.  In such a case, we would have all '+' line.
418                  * After passing the above "two versions" test, that would
419                  * appear as "the same set of parents" to be "all parents".
420                  */
421                 same_diff = 0;
422                 has_interesting = 0;
423                 for (j = i; j < hunk_end && !has_interesting; j++) {
424                         unsigned long this_diff = ~sline[j].flag & all_mask;
425                         struct lline *ll = sline[j].lost_head;
426                         if (this_diff) {
427                                 /* This has some changes.  Is it the
428                                  * same as others?
429                                  */
430                                 if (!same_diff)
431                                         same_diff = this_diff;
432                                 else if (same_diff != this_diff) {
433                                         has_interesting = 1;
434                                         break;
435                                 }
436                         }
437                         while (ll && !has_interesting) {
438                                 /* Lost this line from these parents;
439                                  * who are they?  Are they the same?
440                                  */
441                                 this_diff = ll->parent_map;
442                                 if (!same_diff)
443                                         same_diff = this_diff;
444                                 else if (same_diff != this_diff) {
445                                         has_interesting = 1;
446                                 }
447                                 ll = ll->next;
448                         }
449                 }
450
451                 if (!has_interesting && same_diff != all_mask) {
452                         /* This hunk is not that interesting after all */
453                         for (j = hunk_begin; j < hunk_end; j++)
454                                 sline[j].flag &= ~mark;
455                 }
456                 i = hunk_end;
457         }
458
459         has_interesting = give_context(sline, cnt, num_parent);
460         return has_interesting;
461 }
462
463 static void dump_sline(struct sline *sline, int cnt, int num_parent)
464 {
465         unsigned long mark = (1UL<<num_parent);
466         int i;
467         int lno = 0;
468
469         while (1) {
470                 struct sline *sl = &sline[lno];
471                 int hunk_end;
472                 while (lno < cnt && !(sline[lno].flag & mark))
473                         lno++;
474                 if (cnt <= lno)
475                         break;
476                 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
477                         if (!(sline[hunk_end].flag & mark))
478                                 break;
479                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
480                 printf(" +%d,%d ", lno+1, hunk_end-lno);
481                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
482                 putchar('\n');
483                 while (lno < hunk_end) {
484                         struct lline *ll;
485                         int j;
486                         sl = &sline[lno++];
487                         ll = sl->lost_head;
488                         while (ll) {
489                                 for (j = 0; j < num_parent; j++) {
490                                         if (ll->parent_map & (1UL<<j))
491                                                 putchar('-');
492                                         else
493                                                 putchar(' ');
494                                 }
495                                 puts(ll->line);
496                                 ll = ll->next;
497                         }
498                         for (j = 0; j < num_parent; j++) {
499                                 if ((1UL<<j) & sl->flag)
500                                         putchar(' ');
501                                 else
502                                         putchar('+');
503                         }
504                         printf("%.*s\n", sl->len, sl->bol);
505                 }
506         }
507 }
508
509 static void reuse_combine_diff(struct sline *sline, unsigned long cnt,
510                                int i, int j)
511 {
512         /* We have already examined parent j and we know parent i
513          * and parent j are the same, so reuse the combined result
514          * of parent j for parent i.
515          */
516         unsigned long lno, imask, jmask;
517         imask = (1UL<<i);
518         jmask = (1UL<<j);
519
520         for (lno = 0; lno < cnt; lno++) {
521                 struct lline *ll = sline->lost_head;
522                 while (ll) {
523                         if (ll->parent_map & jmask)
524                                 ll->parent_map |= imask;
525                         ll = ll->next;
526                 }
527                 if (!(sline->flag & jmask))
528                         sline->flag &= ~imask;
529                 sline++;
530         }
531 }
532
533 int show_combined_diff(struct combine_diff_path *elem, int num_parent,
534                        int dense, const char *header, int show_empty)
535 {
536         unsigned long size, cnt, lno;
537         char *result, *cp, *ep;
538         struct sline *sline; /* survived lines */
539         int i, show_hunks, shown_header = 0;
540         char ourtmp_buf[TMPPATHLEN];
541         char *ourtmp = ourtmp_buf;
542
543         /* Read the result of merge first */
544         if (memcmp(elem->sha1, null_sha1, 20)) {
545                 result = grab_blob(elem->sha1, &size);
546                 write_to_temp_file(ourtmp, result, size);
547         }
548         else {
549                 struct stat st;
550                 int fd;
551                 ourtmp = elem->path;
552                 if (0 <= (fd = open(ourtmp, O_RDONLY)) &&
553                     !fstat(fd, &st)) {
554                         int len = st.st_size;
555                         int cnt = 0;
556
557                         size = len;
558                         result = xmalloc(len + 1);
559                         while (cnt < len) {
560                                 int done = xread(fd, result+cnt, len-cnt);
561                                 if (done == 0)
562                                         break;
563                                 if (done < 0)
564                                         die("read error '%s'", ourtmp);
565                                 cnt += done;
566                         }
567                         result[len] = 0;
568                 }
569                 else {
570                         /* deleted file */
571                         size = 0;
572                         result = xmalloc(1);
573                         result[0] = 0;
574                         ourtmp = "/dev/null";
575                 }
576                 if (0 <= fd)
577                         close(fd);
578         }
579
580         for (cnt = 0, cp = result; cp - result < size; cp++) {
581                 if (*cp == '\n')
582                         cnt++;
583         }
584         if (result[size-1] != '\n')
585                 cnt++; /* incomplete line */
586
587         sline = xcalloc(cnt, sizeof(*sline));
588         ep = result;
589         sline[0].bol = result;
590         for (lno = 0, cp = result; cp - result < size; cp++) {
591                 if (*cp == '\n') {
592                         sline[lno].lost_tail = &sline[lno].lost_head;
593                         sline[lno].len = cp - sline[lno].bol;
594                         sline[lno].flag = (1UL<<num_parent) - 1;
595                         lno++;
596                         if (lno < cnt)
597                                 sline[lno].bol = cp + 1;
598                 }
599         }
600         if (result[size-1] != '\n') {
601                 sline[cnt-1].lost_tail = &sline[cnt-1].lost_head;
602                 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
603                 sline[cnt-1].flag = (1UL<<num_parent) - 1;
604         }
605
606         for (i = 0; i < num_parent; i++) {
607                 int j;
608                 for (j = 0; j < i; j++) {
609                         if (!memcmp(elem->parent_sha1[i],
610                                     elem->parent_sha1[j], 20)) {
611                                 reuse_combine_diff(sline, cnt, i, j);
612                                 break;
613                         }
614                 }
615                 if (i <= j)
616                         combine_diff(elem->parent_sha1[i], ourtmp, sline,
617                                      cnt, i);
618         }
619
620         show_hunks = make_hunks(sline, cnt, num_parent, dense);
621
622         if (header && (show_hunks || show_empty)) {
623                 shown_header++;
624                 puts(header);
625         }
626         if (show_hunks) {
627                 printf("diff --%s ", dense ? "cc" : "combined");
628                 if (quote_c_style(elem->path, NULL, NULL, 0))
629                         quote_c_style(elem->path, NULL, stdout, 0);
630                 else
631                         printf("%s", elem->path);
632                 putchar('\n');
633                 printf("index ");
634                 for (i = 0; i < num_parent; i++) {
635                         printf("%s%s",
636                                i ? ".." : "",
637                                find_unique_abbrev(elem->parent_sha1[i],
638                                                   DEFAULT_ABBREV));
639                 }
640                 printf("->%s\n",
641                        find_unique_abbrev(elem->sha1, DEFAULT_ABBREV));
642                 dump_sline(sline, cnt, num_parent);
643         }
644         if (ourtmp == ourtmp_buf)
645                 unlink(ourtmp);
646         free(result);
647
648         for (i = 0; i < cnt; i++) {
649                 if (sline[i].lost_head) {
650                         struct lline *ll = sline[i].lost_head;
651                         while (ll) {
652                                 struct lline *tmp = ll;
653                                 ll = ll->next;
654                                 free(tmp);
655                         }
656                 }
657         }
658         free(sline);
659         return shown_header;
660 }
661
662 int diff_tree_combined_merge(const unsigned char *sha1,
663                              const char *header,
664                              int show_empty_merge, int dense)
665 {
666         struct commit *commit = lookup_commit(sha1);
667         struct diff_options diffopts;
668         struct commit_list *parents;
669         struct combine_diff_path *p, *paths = NULL;
670         int num_parent, i, num_paths;
671
672         diff_setup(&diffopts);
673         diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
674         diffopts.recursive = 1;
675
676         /* count parents */
677         for (parents = commit->parents, num_parent = 0;
678              parents;
679              parents = parents->next, num_parent++)
680                 ; /* nothing */
681
682         /* find set of paths that everybody touches */
683         for (parents = commit->parents, i = 0;
684              parents;
685              parents = parents->next, i++) {
686                 struct commit *parent = parents->item;
687                 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
688                                &diffopts);
689                 paths = intersect_paths(paths, i, num_parent);
690                 diff_flush(&diffopts);
691         }
692
693         /* find out surviving paths */
694         for (num_paths = 0, p = paths; p; p = p->next) {
695                 if (p->len)
696                         num_paths++;
697         }
698         if (num_paths || show_empty_merge) {
699                 for (p = paths; p; p = p->next) {
700                         if (!p->len)
701                                 continue;
702                         if (show_combined_diff(p, num_parent, dense, header,
703                                                show_empty_merge))
704                                 header = NULL;
705                 }
706         }
707
708         /* Clean things up */
709         while (paths) {
710                 struct combine_diff_path *tmp = paths;
711                 paths = paths->next;
712                 free(tmp);
713         }
714         return 0;
715 }