diff-tree --cc: denser combined diff output for a merge commit.
[git.git] / combine-diff.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
6
7 struct path_list {
8         struct path_list *next;
9         int len;
10         char *path;
11         unsigned char sha1[20];
12         unsigned char parent_sha1[FLEX_ARRAY][20];
13 };
14
15 static int uninteresting(struct diff_filepair *p)
16 {
17         if (diff_unmodified_pair(p))
18                 return 1;
19         if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
20                 return 1;
21         return 0;
22 }
23
24 static struct path_list *intersect_paths(struct path_list *curr,
25                                          int n, int num_parent)
26 {
27         struct diff_queue_struct *q = &diff_queued_diff;
28         struct path_list *p;
29         int i;
30
31         if (!n) {
32                 struct path_list *list = NULL, *tail = NULL;
33                 for (i = 0; i < q->nr; i++) {
34                         int len;
35                         const char *path;
36                         if (uninteresting(q->queue[i]))
37                                 continue;
38                         path = q->queue[i]->two->path;
39                         len = strlen(path);
40
41                         p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
42                         p->path = (char*) &(p->parent_sha1[num_parent][0]);
43                         memcpy(p->path, path, len);
44                         p->path[len] = 0;
45                         p->len = len;
46                         p->next = NULL;
47                         memcpy(p->sha1, q->queue[i]->two->sha1, 20);
48                         memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
49                         if (!tail)
50                                 list = tail = p;
51                         else {
52                                 tail->next = p;
53                                 p = tail;
54                         }
55                 }
56                 return list;
57         }
58
59         for (p = curr; p; p = p->next) {
60                 int found = 0;
61                 if (!p->len)
62                         continue;
63                 for (i = 0; i < q->nr; i++) {
64                         const char *path;
65                         int len;
66
67                         if (uninteresting(q->queue[i]))
68                                 continue;
69                         path = q->queue[i]->two->path;
70                         len = strlen(path);
71                         if (len == p->len && !memcmp(path, p->path, len)) {
72                                 found = 1;
73                                 memcpy(p->parent_sha1[n],
74                                        q->queue[i]->one->sha1, 20);
75                                 break;
76                         }
77                 }
78                 if (!found)
79                         p->len = 0;
80         }
81         return curr;
82 }
83
84 struct lline {
85         struct lline *next;
86         int len;
87         unsigned long parent_map;
88         char line[FLEX_ARRAY];
89 };
90
91 struct sline {
92         struct lline *lost_head, **lost_tail;
93         char *bol;
94         int len;
95         unsigned long flag;
96 };
97
98 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
99 {
100         char *blob;
101         char type[20];
102         if (!memcmp(sha1, null_sha1, 20)) {
103                 /* deleted blob */
104                 *size = 0;
105                 return xcalloc(1, 1);
106         }
107         blob = read_sha1_file(sha1, type, size);
108         if (strcmp(type, "blob"))
109                 die("object '%s' is not a blob!", sha1_to_hex(sha1));
110         return blob;
111 }
112
113 #define TMPPATHLEN 50
114 #define MAXLINELEN 10240
115
116 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
117 {
118         int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
119         if (fd < 0)
120                 die("unable to create temp-file");
121         if (write(fd, blob, size) != size)
122                 die("unable to write temp-file");
123         close(fd);
124 }
125
126 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
127 {
128         unsigned long size;
129         void *blob;
130         blob = grab_blob(sha1, &size);
131         write_to_temp_file(tmpfile, blob, size);
132         free(blob);
133 }
134
135 static int parse_num(char **cp_p, unsigned int *num_p)
136 {
137         char *cp = *cp_p;
138         unsigned int num = 0;
139         int read_some;
140
141         while ('0' <= *cp && *cp <= '9')
142                 num = num * 10 + *cp++ - '0';
143         if (!(read_some = cp - *cp_p))
144                 return -1;
145         *cp_p = cp;
146         *num_p = num;
147         return 0;
148 }
149
150 static int parse_hunk_header(char *line, int len,
151                              unsigned int *ob, unsigned int *on,
152                              unsigned int *nb, unsigned int *nn)
153 {
154         char *cp;
155         cp = line + 4;
156         if (parse_num(&cp, ob)) {
157         bad_line:
158                 return error("malformed diff output: %s", line);
159         }
160         if (*cp == ',') {
161                 cp++;
162                 if (parse_num(&cp, on))
163                         goto bad_line;
164         }
165         else
166                 *on = 1;
167         if (*cp++ != ' ' || *cp++ != '+')
168                 goto bad_line;
169         if (parse_num(&cp, nb))
170                 goto bad_line;
171         if (*cp == ',') {
172                 cp++;
173                 if (parse_num(&cp, nn))
174                         goto bad_line;
175         }
176         else
177                 *nn = 1;
178         return -!!memcmp(cp, " @@", 3);
179 }
180
181 static void append_lost(struct sline *sline, int n, const char *line)
182 {
183         struct lline *lline;
184         int len = strlen(line);
185         unsigned long this_mask = (1UL<<n);
186         if (line[len-1] == '\n')
187                 len--;
188
189         /* Check to see if we can squash things */
190         if (sline->lost_head) {
191                 struct lline *last_one = NULL;
192                 /* We cannot squash it with earlier one */
193                 for (lline = sline->lost_head;
194                      lline;
195                      lline = lline->next)
196                         if (lline->parent_map & this_mask)
197                                 last_one = lline;
198                 lline = last_one ? last_one->next : sline->lost_head;
199                 while (lline) {
200                         if (lline->len == len &&
201                             !memcmp(lline->line, line, len)) {
202                                 lline->parent_map |= this_mask;
203                                 return;
204                         }
205                         lline = lline->next;
206                 }
207         }
208
209         lline = xmalloc(sizeof(*lline) + len + 1);
210         lline->len = len;
211         lline->next = NULL;
212         lline->parent_map = this_mask;
213         memcpy(lline->line, line, len);
214         lline->line[len] = 0;
215         if (sline->lost_head)
216                 *(sline->lost_tail) = lline;
217         else
218                 sline->lost_head = lline;
219         sline->lost_tail = &lline->next;
220 }
221
222 static void combine_diff(const unsigned char *parent, const char *ourtmp,
223                          struct sline *sline, int cnt, int n)
224 {
225         FILE *in;
226         char parent_tmp[TMPPATHLEN];
227         char cmd[TMPPATHLEN * 2 + 1024];
228         char line[MAXLINELEN];
229         unsigned int lno, ob, on, nb, nn;
230         unsigned long pmask = ~(1UL << n);
231         struct sline *lost_bucket = NULL;
232
233         write_temp_blob(parent_tmp, parent);
234         sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
235                 parent_tmp, ourtmp);
236         in = popen(cmd, "r");
237         if (!in)
238                 return;
239
240         lno = 1;
241         while (fgets(line, sizeof(line), in) != NULL) {
242                 int len = strlen(line);
243                 if (5 < len && !memcmp("@@ -", line, 4)) {
244                         if (parse_hunk_header(line, len,
245                                               &ob, &on, &nb, &nn))
246                                 break;
247                         lno = nb;
248                         if (!nb) {
249                                 /* @@ -1,2 +0,0 @@ to remove the
250                                  * first two lines...
251                                  */
252                                 nb = 1;
253                         }
254                         lost_bucket = &sline[nb-1]; /* sline is 0 based */
255                         continue;
256                 }
257                 if (!lost_bucket)
258                         continue;
259                 switch (line[0]) {
260                 case '-':
261                         append_lost(lost_bucket, n, line+1);
262                         break;
263                 case '+':
264                         sline[lno-1].flag &= pmask;
265                         lno++;
266                         break;
267                 }
268         }
269         fclose(in);
270         unlink(parent_tmp);
271 }
272
273 static unsigned long context = 3;
274 static char combine_marker = '@';
275
276 static int interesting(struct sline *sline, unsigned long all_mask)
277 {
278         return ((sline->flag & all_mask) != all_mask || sline->lost_head);
279 }
280
281 static unsigned long line_diff_parents(struct sline *sline, unsigned long all_mask)
282 {
283         /*
284          * Look at the line and see from which parents we have difference.
285          * Lower bits of sline->flag records if the parent had this line,
286          * so XOR with all_mask gives us on-bits for parents we have
287          * differences with.
288          */
289         unsigned long parents = (sline->flag ^ all_mask);
290         if (sline->lost_head) {
291                 struct lline *ll;
292                 for (ll = sline->lost_head; ll; ll = ll->next)
293                         parents |= ll->parent_map;
294         }
295         return parents & all_mask;
296 }
297
298 static void make_hunks(struct sline *sline, unsigned long cnt,
299                        int num_parent, int dense)
300 {
301         unsigned long all_mask = (1UL<<num_parent) - 1;
302         unsigned long mark = (1UL<<num_parent);
303         unsigned long i;
304
305         i = 0;
306         while (i < cnt) {
307                 if (interesting(&sline[i], all_mask)) {
308                         unsigned long j = (context < i) ? i - context : 0;
309                         while (j <= i)
310                                 sline[j++].flag |= mark;
311                         while (++i < cnt) {
312                                 if (!interesting(&sline[i], all_mask))
313                                         break;
314                                 sline[i].flag |= mark;
315                         }
316                         j = (i + context < cnt) ? i + context : cnt;
317                         while (i < j)
318                                 sline[i++].flag |= mark;
319                         continue;
320                 }
321                 i++;
322         }
323         if (!dense)
324                 return;
325
326         /* Look at each hunk, and if it contains changes from only
327          * one parent, mark that uninteresting.
328          */
329         i = 0;
330         while (i < cnt) {
331                 int j, hunk_end, diffs;
332                 unsigned long parents;
333                 while (i < cnt && !(sline[i].flag & mark))
334                         i++;
335                 if (cnt <= i)
336                         break; /* No more interesting hunks */
337                 for (hunk_end = i + 1; hunk_end < cnt; hunk_end++)
338                         if (!(sline[hunk_end].flag & mark))
339                                 break;
340                 /* [i..hunk_end) are interesting.  Now is it from
341                  * only one parent?
342                  * If lost lines are only from one parent and
343                  * remaining lines existed in parents other than
344                  * that parent, then the hunk is not that interesting.
345                  */
346                 parents = 0;
347                 diffs = 0;
348                 for (j = i; j < hunk_end; j++)
349                         parents |= line_diff_parents(sline + j, all_mask);
350                 /* Now, how many bits from [0..num_parent) are on? */
351                 for (j = 0; j < num_parent; j++) {
352                         if (parents & (1UL<<j))
353                                 diffs++;
354                 }
355                 if (diffs < 2) {
356                         /* This hunk is not that interesting after all */
357                         for (j = i; j < hunk_end; j++)
358                                 sline[j].flag &= ~mark;
359                 }
360                 i = hunk_end;
361         }
362 }
363
364 static void dump_sline(struct sline *sline, int cnt, int num_parent)
365 {
366         unsigned long mark = (1UL<<num_parent);
367         int i;
368         int lno = 0;
369
370         while (1) {
371                 struct sline *sl = &sline[lno];
372                 int hunk_end;
373                 while (lno < cnt && !(sline[lno].flag & mark))
374                         lno++;
375                 if (cnt <= lno)
376                         break;
377                 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
378                         if (!(sline[hunk_end].flag & mark))
379                                 break;
380                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
381                 printf(" +%d,%d ", lno+1, hunk_end-lno);
382                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
383                 putchar('\n');
384                 while (lno < hunk_end) {
385                         struct lline *ll;
386                         int j;
387                         sl = &sline[lno++];
388                         ll = sl->lost_head;
389                         while (ll) {
390                                 for (j = 0; j < num_parent; j++) {
391                                         if (ll->parent_map & (1UL<<j))
392                                                 putchar('-');
393                                         else
394                                                 putchar(' ');
395                                 }
396                                 putchar(' ');
397                                 puts(ll->line);
398                                 ll = ll->next;
399                         }
400                         for (j = 0; j < num_parent; j++) {
401                                 if ((1UL<<j) & sl->flag)
402                                         putchar(' ');
403                                 else
404                                         putchar('+');
405                         }
406                         printf(" %.*s\n", sl->len, sl->bol);
407                 }
408         }
409 }
410
411 static void show_combined_diff(struct path_list *elem, int num_parent,
412                                int dense)
413 {
414         unsigned long size, cnt, lno;
415         char *result, *cp, *ep;
416         struct sline *sline; /* survived lines */
417         int i;
418         char ourtmp[TMPPATHLEN];
419
420         /* Read the result of merge first */
421         result = grab_blob(elem->sha1, &size);
422         write_to_temp_file(ourtmp, result, size);
423
424         for (cnt = 0, cp = result; cp - result < size; cp++) {
425                 if (*cp == '\n')
426                         cnt++;
427         }
428         if (result[size-1] != '\n')
429                 cnt++; /* incomplete line */
430
431         sline = xcalloc(cnt, sizeof(*sline));
432         ep = result;
433         sline[0].bol = result;
434         for (lno = 0, cp = result; cp - result < size; cp++) {
435                 if (*cp == '\n') {
436                         sline[lno].len = cp - sline[lno].bol;
437                         sline[lno].flag = (1UL<<num_parent) - 1;
438                         lno++;
439                         if (lno < cnt)
440                                 sline[lno].bol = cp + 1;
441                 }
442         }
443         if (result[size-1] != '\n') {
444                 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
445                 sline[cnt-1].flag = (1UL<<num_parent) - 1;
446         }
447
448         for (i = 0; i < num_parent; i++)
449                 combine_diff(elem->parent_sha1[i], ourtmp, sline, cnt, i);
450
451         make_hunks(sline, cnt, num_parent, dense);
452
453         dump_sline(sline, cnt, num_parent);
454         unlink(ourtmp);
455         free(result);
456
457         for (i = 0; i < cnt; i++) {
458                 if (sline[i].lost_head) {
459                         struct lline *ll = sline[i].lost_head;
460                         while (ll) {
461                                 struct lline *tmp = ll;
462                                 ll = ll->next;
463                                 free(tmp);
464                         }
465                 }
466         }
467         free(sline);
468 }
469
470 int diff_tree_combined_merge(const unsigned char *sha1,
471                              const char *header,
472                              int show_empty_merge, int dense)
473 {
474         struct commit *commit = lookup_commit(sha1);
475         struct diff_options diffopts;
476         struct commit_list *parents;
477         struct path_list *p, *paths = NULL;
478         int num_parent, i, num_paths;
479
480         diff_setup(&diffopts);
481         diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
482         diffopts.recursive = 1;
483
484         /* count parents */
485         for (parents = commit->parents, num_parent = 0;
486              parents;
487              parents = parents->next, num_parent++)
488                 ; /* nothing */
489
490         /* find set of paths that everybody touches */
491         for (parents = commit->parents, i = 0;
492              parents;
493              parents = parents->next, i++) {
494                 struct commit *parent = parents->item;
495                 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
496                                &diffopts);
497                 paths = intersect_paths(paths, i, num_parent);
498                 diff_flush(&diffopts);
499         }
500
501         /* find out surviving paths */
502         for (num_paths = 0, p = paths; p; p = p->next) {
503                 if (p->len)
504                         num_paths++;
505         }
506         if (num_paths || show_empty_merge) {
507                 puts(header);
508                 for (p = paths; p; p = p->next) {
509                         if (!p->len)
510                                 continue;
511                         printf("diff --combined ");
512                         if (quote_c_style(p->path, NULL, NULL, 0))
513                                 quote_c_style(p->path, NULL, stdout, 0);
514                         else
515                                 printf("%s", p->path);
516                         putchar('\n');
517                         show_combined_diff(p, num_parent, dense);
518                 }
519         }
520
521         /* Clean things up */
522         while (paths) {
523                 struct path_list *tmp = paths;
524                 paths = paths->next;
525                 free(tmp);
526         }
527         return 0;
528 }