combine-diff: better hunk splitting.
[git.git] / combine-diff.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "diffcore.h"
5 #include "quote.h"
6
7 struct path_list {
8         struct path_list *next;
9         int len;
10         char *path;
11         unsigned char sha1[20];
12         unsigned char parent_sha1[FLEX_ARRAY][20];
13 };
14
15 static int uninteresting(struct diff_filepair *p)
16 {
17         if (diff_unmodified_pair(p))
18                 return 1;
19         if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
20                 return 1;
21         return 0;
22 }
23
24 static struct path_list *intersect_paths(struct path_list *curr,
25                                          int n, int num_parent)
26 {
27         struct diff_queue_struct *q = &diff_queued_diff;
28         struct path_list *p;
29         int i;
30
31         if (!n) {
32                 struct path_list *list = NULL, **tail = &list;
33                 for (i = 0; i < q->nr; i++) {
34                         int len;
35                         const char *path;
36                         if (uninteresting(q->queue[i]))
37                                 continue;
38                         path = q->queue[i]->two->path;
39                         len = strlen(path);
40
41                         p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
42                         p->path = (char*) &(p->parent_sha1[num_parent][0]);
43                         memcpy(p->path, path, len);
44                         p->path[len] = 0;
45                         p->len = len;
46                         p->next = NULL;
47                         memcpy(p->sha1, q->queue[i]->two->sha1, 20);
48                         memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
49                         *tail = p;
50                         tail = &p->next;
51                 }
52                 return list;
53         }
54
55         for (p = curr; p; p = p->next) {
56                 int found = 0;
57                 if (!p->len)
58                         continue;
59                 for (i = 0; i < q->nr; i++) {
60                         const char *path;
61                         int len;
62
63                         if (uninteresting(q->queue[i]))
64                                 continue;
65                         path = q->queue[i]->two->path;
66                         len = strlen(path);
67                         if (len == p->len && !memcmp(path, p->path, len)) {
68                                 found = 1;
69                                 memcpy(p->parent_sha1[n],
70                                        q->queue[i]->one->sha1, 20);
71                                 break;
72                         }
73                 }
74                 if (!found)
75                         p->len = 0;
76         }
77         return curr;
78 }
79
80 struct lline {
81         struct lline *next;
82         int len;
83         unsigned long parent_map;
84         char line[FLEX_ARRAY];
85 };
86
87 struct sline {
88         struct lline *lost_head, **lost_tail;
89         char *bol;
90         int len;
91         unsigned long flag;
92 };
93
94 static char *grab_blob(const unsigned char *sha1, unsigned long *size)
95 {
96         char *blob;
97         char type[20];
98         if (!memcmp(sha1, null_sha1, 20)) {
99                 /* deleted blob */
100                 *size = 0;
101                 return xcalloc(1, 1);
102         }
103         blob = read_sha1_file(sha1, type, size);
104         if (strcmp(type, "blob"))
105                 die("object '%s' is not a blob!", sha1_to_hex(sha1));
106         return blob;
107 }
108
109 #define TMPPATHLEN 50
110 #define MAXLINELEN 10240
111
112 static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
113 {
114         int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
115         if (fd < 0)
116                 die("unable to create temp-file");
117         if (write(fd, blob, size) != size)
118                 die("unable to write temp-file");
119         close(fd);
120 }
121
122 static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
123 {
124         unsigned long size;
125         void *blob;
126         blob = grab_blob(sha1, &size);
127         write_to_temp_file(tmpfile, blob, size);
128         free(blob);
129 }
130
131 static int parse_num(char **cp_p, unsigned int *num_p)
132 {
133         char *cp = *cp_p;
134         unsigned int num = 0;
135         int read_some;
136
137         while ('0' <= *cp && *cp <= '9')
138                 num = num * 10 + *cp++ - '0';
139         if (!(read_some = cp - *cp_p))
140                 return -1;
141         *cp_p = cp;
142         *num_p = num;
143         return 0;
144 }
145
146 static int parse_hunk_header(char *line, int len,
147                              unsigned int *ob, unsigned int *on,
148                              unsigned int *nb, unsigned int *nn)
149 {
150         char *cp;
151         cp = line + 4;
152         if (parse_num(&cp, ob)) {
153         bad_line:
154                 return error("malformed diff output: %s", line);
155         }
156         if (*cp == ',') {
157                 cp++;
158                 if (parse_num(&cp, on))
159                         goto bad_line;
160         }
161         else
162                 *on = 1;
163         if (*cp++ != ' ' || *cp++ != '+')
164                 goto bad_line;
165         if (parse_num(&cp, nb))
166                 goto bad_line;
167         if (*cp == ',') {
168                 cp++;
169                 if (parse_num(&cp, nn))
170                         goto bad_line;
171         }
172         else
173                 *nn = 1;
174         return -!!memcmp(cp, " @@", 3);
175 }
176
177 static void append_lost(struct sline *sline, int n, const char *line)
178 {
179         struct lline *lline;
180         int len = strlen(line);
181         unsigned long this_mask = (1UL<<n);
182         if (line[len-1] == '\n')
183                 len--;
184
185         /* Check to see if we can squash things */
186         if (sline->lost_head) {
187                 struct lline *last_one = NULL;
188                 /* We cannot squash it with earlier one */
189                 for (lline = sline->lost_head;
190                      lline;
191                      lline = lline->next)
192                         if (lline->parent_map & this_mask)
193                                 last_one = lline;
194                 lline = last_one ? last_one->next : sline->lost_head;
195                 while (lline) {
196                         if (lline->len == len &&
197                             !memcmp(lline->line, line, len)) {
198                                 lline->parent_map |= this_mask;
199                                 return;
200                         }
201                         lline = lline->next;
202                 }
203         }
204
205         lline = xmalloc(sizeof(*lline) + len + 1);
206         lline->len = len;
207         lline->next = NULL;
208         lline->parent_map = this_mask;
209         memcpy(lline->line, line, len);
210         lline->line[len] = 0;
211         *sline->lost_tail = lline;
212         sline->lost_tail = &lline->next;
213 }
214
215 static void combine_diff(const unsigned char *parent, const char *ourtmp,
216                          struct sline *sline, int cnt, int n)
217 {
218         FILE *in;
219         char parent_tmp[TMPPATHLEN];
220         char cmd[TMPPATHLEN * 2 + 1024];
221         char line[MAXLINELEN];
222         unsigned int lno, ob, on, nb, nn;
223         unsigned long pmask = ~(1UL << n);
224         struct sline *lost_bucket = NULL;
225
226         write_temp_blob(parent_tmp, parent);
227         sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
228                 parent_tmp, ourtmp);
229         in = popen(cmd, "r");
230         if (!in)
231                 return;
232
233         lno = 1;
234         while (fgets(line, sizeof(line), in) != NULL) {
235                 int len = strlen(line);
236                 if (5 < len && !memcmp("@@ -", line, 4)) {
237                         if (parse_hunk_header(line, len,
238                                               &ob, &on, &nb, &nn))
239                                 break;
240                         lno = nb;
241                         if (!nb) {
242                                 /* @@ -1,2 +0,0 @@ to remove the
243                                  * first two lines...
244                                  */
245                                 nb = 1;
246                         }
247                         lost_bucket = &sline[nb-1]; /* sline is 0 based */
248                         continue;
249                 }
250                 if (!lost_bucket)
251                         continue;
252                 switch (line[0]) {
253                 case '-':
254                         append_lost(lost_bucket, n, line+1);
255                         break;
256                 case '+':
257                         sline[lno-1].flag &= pmask;
258                         lno++;
259                         break;
260                 }
261         }
262         fclose(in);
263         unlink(parent_tmp);
264 }
265
266 static unsigned long context = 3;
267 static char combine_marker = '@';
268
269 static int interesting(struct sline *sline, unsigned long all_mask)
270 {
271         return ((sline->flag & all_mask) != all_mask || sline->lost_head);
272 }
273
274 static unsigned long line_common_diff(struct sline *sline, unsigned long all_mask)
275 {
276         /*
277          * Look at the line and see from which parents we have the
278          * same difference.
279          */
280
281         /* Lower bits of sline->flag records if the parent had this
282          * line, so XOR with all_mask gives us on-bits for parents we
283          * have differences with.
284          */
285         unsigned long common_adds = (sline->flag ^ all_mask) & all_mask;
286         unsigned long common_removes = all_mask;
287
288         /* If all the parents have this line, that also counts as
289          * having the same difference.
290          */
291         if (!common_adds)
292                 common_adds = all_mask;
293
294         if (sline->lost_head) {
295                 /* Lost head list records the lines removed from
296                  * the parents, and parent_map records from which
297                  * parent the line was removed.
298                  */
299                 struct lline *ll;
300                 for (ll = sline->lost_head; ll; ll = ll->next) {
301                         common_removes &= ll->parent_map;
302                 }
303         }
304         return common_adds & common_removes;
305 }
306
307 static unsigned long line_all_diff(struct sline *sline, unsigned long all_mask)
308 {
309         /*
310          * Look at the line and see from which parents we have some difference.
311          */
312         unsigned long different = (sline->flag ^ all_mask) & all_mask;
313         if (sline->lost_head) {
314                 /* Lost head list records the lines removed from
315                  * the parents, and parent_map records from which
316                  * parent the line was removed.
317                  */
318                 struct lline *ll;
319                 for (ll = sline->lost_head; ll; ll = ll->next) {
320                         different |= ll->parent_map;
321                 }
322         }
323         return different;
324 }
325
326 static unsigned long adjust_hunk_tail(struct sline *sline,
327                                       unsigned long all_mask,
328                                       unsigned long hunk_begin,
329                                       unsigned long i)
330 {
331         /* i points at the first uninteresting line.
332          * If the last line of the hunk was interesting
333          * only because it has some deletion, then
334          * it is not all that interesting for the
335          * purpose of giving trailing context lines.
336          */
337         if ((hunk_begin + 1 <= i) &&
338             ((sline[i-1].flag & all_mask) == all_mask))
339                 i--;
340         return i;
341 }
342
343 static unsigned long next_interesting(struct sline *sline,
344                                       unsigned long mark,
345                                       unsigned long i,
346                                       unsigned long cnt,
347                                       int uninteresting)
348 {
349         while (i < cnt)
350                 if (uninteresting ?
351                     !(sline[i].flag & mark) :
352                     (sline[i].flag & mark))
353                         return i;
354                 else
355                         i++;
356         return cnt;
357 }
358
359 static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
360 {
361         unsigned long all_mask = (1UL<<num_parent) - 1;
362         unsigned long mark = (1UL<<num_parent);
363         unsigned long i;
364
365         i = next_interesting(sline, mark, 0, cnt, 0);
366         if (cnt <= i)
367                 return 0;
368
369         while (i < cnt) {
370                 unsigned long j = (context < i) ? (i - context) : 0;
371                 unsigned long k;
372                 while (j < i)
373                         sline[j++].flag |= mark;
374
375         again:
376                 j = next_interesting(sline, mark, i, cnt, 1);
377                 if (cnt <= j)
378                         break; /* the rest are all interesting */
379
380                 /* lookahead context lines */
381                 k = next_interesting(sline, mark, j, cnt, 0);
382                 j = adjust_hunk_tail(sline, all_mask, i, j);
383
384                 if (k < j + context) {
385                         /* k is interesting and [j,k) are not, but
386                          * paint them interesting because the gap is small.
387                          */
388                         while (j < k)
389                                 sline[j++].flag |= mark;
390                         i = k;
391                         goto again;
392                 }
393
394                 /* j is the first uninteresting line and there is
395                  * no overlap beyond it within context lines.
396                  */
397                 i = k;
398                 k = (j + context < cnt) ? j + context : cnt;
399                 while (j < k)
400                         sline[j++].flag |= mark;
401         }
402         return 1;
403 }
404
405 static int make_hunks(struct sline *sline, unsigned long cnt,
406                        int num_parent, int dense)
407 {
408         unsigned long all_mask = (1UL<<num_parent) - 1;
409         unsigned long mark = (1UL<<num_parent);
410         unsigned long i;
411         int has_interesting = 0;
412
413         for (i = 0; i < cnt; i++) {
414                 if (interesting(&sline[i], all_mask))
415                         sline[i].flag |= mark;
416                 else
417                         sline[i].flag &= ~mark;
418         }
419         if (!dense)
420                 return give_context(sline, cnt, num_parent);
421
422         /* Look at each hunk, and if we have changes from only one
423          * parent, or the changes are the same from all but one
424          * parent, mark that uninteresting.
425          */
426         i = 0;
427         while (i < cnt) {
428                 unsigned long j, hunk_begin, hunk_end;
429                 int same, diff;
430                 unsigned long same_diff, all_diff;
431                 while (i < cnt && !(sline[i].flag & mark))
432                         i++;
433                 if (cnt <= i)
434                         break; /* No more interesting hunks */
435                 hunk_begin = i;
436                 for (j = i + 1; j < cnt; j++) {
437                         if (!(sline[j].flag & mark)) {
438                                 /* Look beyond the end to see if there
439                                  * is an interesting line after this
440                                  * hunk within context span.
441                                  */
442                                 unsigned long la; /* lookahead */
443                                 int contin = 0;
444                                 la = adjust_hunk_tail(sline, all_mask,
445                                                      hunk_begin, j);
446                                 la = (la + context < cnt) ?
447                                         (la + context) : cnt;
448                                 while (j <= --la) {
449                                         if (sline[la].flag & mark) {
450                                                 contin = 1;
451                                                 break;
452                                         }
453                                 }
454                                 if (!contin)
455                                         break;
456                                 j = la;
457                         }
458                 }
459                 hunk_end = j;
460
461                 /* [i..hunk_end) are interesting.  Now does it have
462                  * the same change with all but one parent?
463                  */
464                 same_diff = all_mask;
465                 all_diff = 0;
466                 for (j = i; j < hunk_end; j++) {
467                         same_diff &= line_common_diff(sline + j, all_mask);
468                         all_diff |= line_all_diff(sline + j, all_mask);
469                 }
470                 diff = same = 0;
471                 for (j = 0; j < num_parent; j++) {
472                         if (same_diff & (1UL<<j))
473                                 same++;
474                         if (all_diff & (1UL<<j))
475                                 diff++;
476                 }
477                 if ((num_parent - 1 <= same) || (diff == 1)) {
478                         /* This hunk is not that interesting after all */
479                         for (j = hunk_begin; j < hunk_end; j++)
480                                 sline[j].flag &= ~mark;
481                 }
482                 i = hunk_end;
483         }
484
485         has_interesting = give_context(sline, cnt, num_parent);
486         return has_interesting;
487 }
488
489 static void dump_sline(struct sline *sline, int cnt, int num_parent)
490 {
491         unsigned long mark = (1UL<<num_parent);
492         int i;
493         int lno = 0;
494
495         while (1) {
496                 struct sline *sl = &sline[lno];
497                 int hunk_end;
498                 while (lno < cnt && !(sline[lno].flag & mark))
499                         lno++;
500                 if (cnt <= lno)
501                         break;
502                 for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
503                         if (!(sline[hunk_end].flag & mark))
504                                 break;
505                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
506                 printf(" +%d,%d ", lno+1, hunk_end-lno);
507                 for (i = 0; i <= num_parent; i++) putchar(combine_marker);
508                 putchar('\n');
509                 while (lno < hunk_end) {
510                         struct lline *ll;
511                         int j;
512                         sl = &sline[lno++];
513                         ll = sl->lost_head;
514                         while (ll) {
515                                 for (j = 0; j < num_parent; j++) {
516                                         if (ll->parent_map & (1UL<<j))
517                                                 putchar('-');
518                                         else
519                                                 putchar(' ');
520                                 }
521                                 puts(ll->line);
522                                 ll = ll->next;
523                         }
524                         for (j = 0; j < num_parent; j++) {
525                                 if ((1UL<<j) & sl->flag)
526                                         putchar(' ');
527                                 else
528                                         putchar('+');
529                         }
530                         printf("%.*s\n", sl->len, sl->bol);
531                 }
532         }
533 }
534
535 static int show_combined_diff(struct path_list *elem, int num_parent,
536                               int dense, const char *header, int show_empty)
537 {
538         unsigned long size, cnt, lno;
539         char *result, *cp, *ep;
540         struct sline *sline; /* survived lines */
541         int i, show_hunks, shown_header = 0;
542         char ourtmp[TMPPATHLEN];
543
544         /* Read the result of merge first */
545         result = grab_blob(elem->sha1, &size);
546         write_to_temp_file(ourtmp, result, size);
547
548         for (cnt = 0, cp = result; cp - result < size; cp++) {
549                 if (*cp == '\n')
550                         cnt++;
551         }
552         if (result[size-1] != '\n')
553                 cnt++; /* incomplete line */
554
555         sline = xcalloc(cnt, sizeof(*sline));
556         ep = result;
557         sline[0].bol = result;
558         for (lno = 0, cp = result; cp - result < size; cp++) {
559                 if (*cp == '\n') {
560                         sline[lno].lost_tail = &sline[lno].lost_head;
561                         sline[lno].len = cp - sline[lno].bol;
562                         sline[lno].flag = (1UL<<num_parent) - 1;
563                         lno++;
564                         if (lno < cnt)
565                                 sline[lno].bol = cp + 1;
566                 }
567         }
568         if (result[size-1] != '\n') {
569                 sline[cnt-1].lost_tail = &sline[cnt-1].lost_head;
570                 sline[cnt-1].len = size - (sline[cnt-1].bol - result);
571                 sline[cnt-1].flag = (1UL<<num_parent) - 1;
572         }
573
574         for (i = 0; i < num_parent; i++)
575                 combine_diff(elem->parent_sha1[i], ourtmp, sline, cnt, i);
576
577         show_hunks = make_hunks(sline, cnt, num_parent, dense);
578
579         if (header && (show_hunks || show_empty)) {
580                 shown_header++;
581                 puts(header);
582         }
583         if (show_hunks) {
584                 printf("diff --%s ", dense ? "cc" : "combined");
585                 if (quote_c_style(elem->path, NULL, NULL, 0))
586                         quote_c_style(elem->path, NULL, stdout, 0);
587                 else
588                         printf("%s", elem->path);
589                 putchar('\n');
590                 dump_sline(sline, cnt, num_parent);
591         }
592         unlink(ourtmp);
593         free(result);
594
595         for (i = 0; i < cnt; i++) {
596                 if (sline[i].lost_head) {
597                         struct lline *ll = sline[i].lost_head;
598                         while (ll) {
599                                 struct lline *tmp = ll;
600                                 ll = ll->next;
601                                 free(tmp);
602                         }
603                 }
604         }
605         free(sline);
606         return shown_header;
607 }
608
609 int diff_tree_combined_merge(const unsigned char *sha1,
610                              const char *header,
611                              int show_empty_merge, int dense)
612 {
613         struct commit *commit = lookup_commit(sha1);
614         struct diff_options diffopts;
615         struct commit_list *parents;
616         struct path_list *p, *paths = NULL;
617         int num_parent, i, num_paths;
618
619         diff_setup(&diffopts);
620         diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
621         diffopts.recursive = 1;
622
623         /* count parents */
624         for (parents = commit->parents, num_parent = 0;
625              parents;
626              parents = parents->next, num_parent++)
627                 ; /* nothing */
628
629         /* find set of paths that everybody touches */
630         for (parents = commit->parents, i = 0;
631              parents;
632              parents = parents->next, i++) {
633                 struct commit *parent = parents->item;
634                 diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
635                                &diffopts);
636                 paths = intersect_paths(paths, i, num_parent);
637                 diff_flush(&diffopts);
638         }
639
640         /* find out surviving paths */
641         for (num_paths = 0, p = paths; p; p = p->next) {
642                 if (p->len)
643                         num_paths++;
644         }
645         if (num_paths || show_empty_merge) {
646                 for (p = paths; p; p = p->next) {
647                         if (!p->len)
648                                 continue;
649                         if (show_combined_diff(p, num_parent, dense, header,
650                                                show_empty_merge))
651                                 header = NULL;
652                 }
653         }
654
655         /* Clean things up */
656         while (paths) {
657                 struct path_list *tmp = paths;
658                 paths = paths->next;
659                 free(tmp);
660         }
661         return 0;
662 }