Const tightening.
[git.git] / blame.c
1 /*
2  * Copyright (C) 2006, Fredrik Kuivinen <freku045@student.liu.se>
3  */
4
5 #include <assert.h>
6 #include <time.h>
7 #include <sys/time.h>
8
9 #include "cache.h"
10 #include "refs.h"
11 #include "tag.h"
12 #include "commit.h"
13 #include "tree.h"
14 #include "blob.h"
15 #include "diff.h"
16 #include "revision.h"
17
18 #define DEBUG 0
19
20 struct commit **blame_lines;
21 int num_blame_lines;
22
23 struct util_info {
24         int *line_map;
25         unsigned char sha1[20]; /* blob sha, not commit! */
26         char *buf;
27         unsigned long size;
28         int num_lines;
29 //    const char* path;
30 };
31
32 struct chunk {
33         int off1, len1; // ---
34         int off2, len2; // +++
35 };
36
37 struct patch {
38         struct chunk *chunks;
39         int num;
40 };
41
42 static void get_blob(struct commit *commit);
43
44 /* Only used for statistics */
45 static int num_get_patch = 0;
46 static int num_commits = 0;
47 static int patch_time = 0;
48
49 #define TEMPFILE_PATH_LEN 60
50 static struct patch *get_patch(struct commit *commit, struct commit *other)
51 {
52         struct patch *ret;
53         struct util_info *info_c = (struct util_info *)commit->object.util;
54         struct util_info *info_o = (struct util_info *)other->object.util;
55         char tmp_path1[TEMPFILE_PATH_LEN], tmp_path2[TEMPFILE_PATH_LEN];
56         char diff_cmd[TEMPFILE_PATH_LEN*2 + 20];
57         struct timeval tv_start, tv_end;
58         int fd;
59         FILE *fin;
60         char buf[1024];
61
62         ret = xmalloc(sizeof(struct patch));
63         ret->chunks = NULL;
64         ret->num = 0;
65
66         get_blob(commit);
67         get_blob(other);
68
69         gettimeofday(&tv_start, NULL);
70
71         fd = git_mkstemp(tmp_path1, TEMPFILE_PATH_LEN, "git-blame-XXXXXX");
72         if (fd < 0)
73                 die("unable to create temp-file: %s", strerror(errno));
74
75         if (xwrite(fd, info_c->buf, info_c->size) != info_c->size)
76                 die("write failed: %s", strerror(errno));
77         close(fd);
78
79         fd = git_mkstemp(tmp_path2, TEMPFILE_PATH_LEN, "git-blame-XXXXXX");
80         if (fd < 0)
81                 die("unable to create temp-file: %s", strerror(errno));
82
83         if (xwrite(fd, info_o->buf, info_o->size) != info_o->size)
84                 die("write failed: %s", strerror(errno));
85         close(fd);
86
87         sprintf(diff_cmd, "diff -u0 %s %s", tmp_path1, tmp_path2);
88         fin = popen(diff_cmd, "r");
89         if (!fin)
90                 die("popen failed: %s", strerror(errno));
91
92         while (fgets(buf, sizeof(buf), fin)) {
93                 struct chunk *chunk;
94                 char *start, *sp;
95
96                 if (buf[0] != '@' || buf[1] != '@')
97                         continue;
98
99                 if (DEBUG)
100                         printf("chunk line: %s", buf);
101                 ret->num++;
102                 ret->chunks = xrealloc(ret->chunks,
103                                        sizeof(struct chunk) * ret->num);
104                 chunk = &ret->chunks[ret->num - 1];
105
106                 assert(!strncmp(buf, "@@ -", 4));
107
108                 start = buf + 4;
109                 sp = index(start, ' ');
110                 *sp = '\0';
111                 if (index(start, ',')) {
112                         int ret =
113                             sscanf(start, "%d,%d", &chunk->off1, &chunk->len1);
114                         assert(ret == 2);
115                 } else {
116                         int ret = sscanf(start, "%d", &chunk->off1);
117                         assert(ret == 1);
118                         chunk->len1 = 1;
119                 }
120                 *sp = ' ';
121
122                 start = sp + 1;
123                 sp = index(start, ' ');
124                 *sp = '\0';
125                 if (index(start, ',')) {
126                         int ret =
127                             sscanf(start, "%d,%d", &chunk->off2, &chunk->len2);
128                         assert(ret == 2);
129                 } else {
130                         int ret = sscanf(start, "%d", &chunk->off2);
131                         assert(ret == 1);
132                         chunk->len2 = 1;
133                 }
134                 *sp = ' ';
135
136                 if (chunk->len1 == 0)
137                         chunk->off1++;
138                 if (chunk->len2 == 0)
139                         chunk->off2++;
140
141                 if (chunk->off1 > 0)
142                         chunk->off1--;
143                 if (chunk->off2 > 0)
144                         chunk->off2--;
145
146                 assert(chunk->off1 >= 0);
147                 assert(chunk->off2 >= 0);
148         }
149         pclose(fin);
150         unlink(tmp_path1);
151         unlink(tmp_path2);
152
153         gettimeofday(&tv_end, NULL);
154         patch_time += 1000000 * (tv_end.tv_sec - tv_start.tv_sec) +
155                 tv_end.tv_usec - tv_start.tv_usec;
156
157         num_get_patch++;
158         return ret;
159 }
160
161 static void free_patch(struct patch *p)
162 {
163         free(p->chunks);
164         free(p);
165 }
166
167 static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
168                                   int baselen, const char *pathname,
169                                   unsigned mode, int stage);
170
171 static unsigned char blob_sha1[20];
172 static int get_blob_sha1(struct tree *t, const char *pathname,
173                          unsigned char *sha1)
174 {
175         int i;
176         const char *pathspec[2];
177         pathspec[0] = pathname;
178         pathspec[1] = NULL;
179         memset(blob_sha1, 0, sizeof(blob_sha1));
180         read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal);
181
182         for (i = 0; i < 20; i++) {
183                 if (blob_sha1[i] != 0)
184                         break;
185         }
186
187         if (i == 20)
188                 return -1;
189
190         memcpy(sha1, blob_sha1, 20);
191         return 0;
192 }
193
194 static int get_blob_sha1_internal(unsigned char *sha1, const char *base,
195                                   int baselen, const char *pathname,
196                                   unsigned mode, int stage)
197 {
198         if (S_ISDIR(mode))
199                 return READ_TREE_RECURSIVE;
200
201         memcpy(blob_sha1, sha1, 20);
202         return -1;
203 }
204
205 static void get_blob(struct commit *commit)
206 {
207         struct util_info *info = commit->object.util;
208         char type[20];
209
210         if (info->buf)
211                 return;
212
213         info->buf = read_sha1_file(info->sha1, type, &info->size);
214
215         assert(!strcmp(type, "blob"));
216 }
217
218 /* For debugging only */
219 static void print_patch(struct patch *p)
220 {
221         int i;
222         printf("Num chunks: %d\n", p->num);
223         for (i = 0; i < p->num; i++) {
224                 printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1,
225                        p->chunks[i].off2, p->chunks[i].len2);
226         }
227 }
228
229 #if 0
230 /* For debugging only */
231 static void print_map(struct commit *cmit, struct commit *other)
232 {
233         struct util_info *util = cmit->object.util;
234         struct util_info *util2 = other->object.util;
235
236         int i;
237         int max =
238             util->num_lines >
239             util2->num_lines ? util->num_lines : util2->num_lines;
240         int num;
241
242         for (i = 0; i < max; i++) {
243                 printf("i: %d ", i);
244                 num = -1;
245
246                 if (i < util->num_lines) {
247                         num = util->line_map[i];
248                         printf("%d\t", num);
249                 } else
250                         printf("\t");
251
252                 if (i < util2->num_lines) {
253                         int num2 = util2->line_map[i];
254                         printf("%d\t", num2);
255                         if (num != -1 && num2 != num)
256                                 printf("---");
257                 } else
258                         printf("\t");
259
260                 printf("\n");
261         }
262 }
263 #endif
264
265 // p is a patch from commit to other.
266 static void fill_line_map(struct commit *commit, struct commit *other,
267                           struct patch *p)
268 {
269         struct util_info *util = commit->object.util;
270         struct util_info *util2 = other->object.util;
271         int *map = util->line_map;
272         int *map2 = util2->line_map;
273         int cur_chunk = 0;
274         int i1, i2;
275
276         if (p->num && DEBUG)
277                 print_patch(p);
278
279         if (DEBUG)
280                 printf("num lines 1: %d num lines 2: %d\n", util->num_lines,
281                        util2->num_lines);
282
283         for (i1 = 0, i2 = 0; i1 < util->num_lines; i1++, i2++) {
284                 struct chunk *chunk = NULL;
285                 if (cur_chunk < p->num)
286                         chunk = &p->chunks[cur_chunk];
287
288                 if (chunk && chunk->off1 == i1) {
289                         if (DEBUG && i2 != chunk->off2)
290                                 printf("i2: %d off2: %d\n", i2, chunk->off2);
291
292                         assert(i2 == chunk->off2);
293
294                         i1--;
295                         i2--;
296                         if (chunk->len1 > 0)
297                                 i1 += chunk->len1;
298
299                         if (chunk->len2 > 0)
300                                 i2 += chunk->len2;
301
302                         cur_chunk++;
303                 } else {
304                         if (i2 >= util2->num_lines)
305                                 break;
306
307                         if (map[i1] != map2[i2] && map[i1] != -1) {
308                                 if (DEBUG)
309                                         printf("map: i1: %d %d %p i2: %d %d %p\n",
310                                                i1, map[i1],
311                                                i1 != -1 ? blame_lines[map[i1]] : NULL,
312                                                i2, map2[i2],
313                                                i2 != -1 ? blame_lines[map2[i2]] : NULL);
314                                 if (map2[i2] != -1 &&
315                                     blame_lines[map[i1]] &&
316                                     !blame_lines[map2[i2]])
317                                         map[i1] = map2[i2];
318                         }
319
320                         if (map[i1] == -1 && map2[i2] != -1)
321                                 map[i1] = map2[i2];
322                 }
323
324                 if (DEBUG > 1)
325                         printf("l1: %d l2: %d i1: %d i2: %d\n",
326                                map[i1], map2[i2], i1, i2);
327         }
328 }
329
330 static int map_line(struct commit *commit, int line)
331 {
332         struct util_info *info = commit->object.util;
333         assert(line >= 0 && line < info->num_lines);
334         return info->line_map[line];
335 }
336
337 static int fill_util_info(struct commit *commit, const char *path)
338 {
339         struct util_info *util;
340         if (commit->object.util)
341                 return 0;
342
343         util = xmalloc(sizeof(struct util_info));
344
345         if (get_blob_sha1(commit->tree, path, util->sha1)) {
346                 free(util);
347                 return 1;
348         } else {
349                 util->buf = NULL;
350                 util->size = 0;
351                 util->line_map = NULL;
352                 util->num_lines = -1;
353                 commit->object.util = util;
354                 return 0;
355         }
356 }
357
358 static void alloc_line_map(struct commit *commit)
359 {
360         struct util_info *util = commit->object.util;
361         int i;
362
363         if (util->line_map)
364                 return;
365
366         get_blob(commit);
367
368         util->num_lines = 0;
369         for (i = 0; i < util->size; i++) {
370                 if (util->buf[i] == '\n')
371                         util->num_lines++;
372         }
373         if(util->buf[util->size - 1] != '\n')
374                 util->num_lines++;
375
376         util->line_map = xmalloc(sizeof(int) * util->num_lines);
377
378         for (i = 0; i < util->num_lines; i++)
379                 util->line_map[i] = -1;
380 }
381
382 static void init_first_commit(struct commit* commit, const char* filename)
383 {
384         struct util_info* util;
385         int i;
386
387         if (fill_util_info(commit, filename))
388                 die("fill_util_info failed");
389
390         alloc_line_map(commit);
391
392         util = commit->object.util;
393         num_blame_lines = util->num_lines;
394
395         for (i = 0; i < num_blame_lines; i++)
396                 util->line_map[i] = i;
397 }
398
399
400 static void process_commits(struct rev_info *rev, const char *path,
401                             struct commit** initial)
402 {
403         int i;
404         struct util_info* util;
405         int lines_left;
406         int *blame_p;
407         int *new_lines;
408         int new_lines_len;
409
410         struct commit* commit = get_revision(rev);
411         assert(commit);
412         init_first_commit(commit, path);
413
414         util = commit->object.util;
415         num_blame_lines = util->num_lines;
416         blame_lines = xmalloc(sizeof(struct commit *) * num_blame_lines);
417         for (i = 0; i < num_blame_lines; i++)
418                 blame_lines[i] = NULL;
419
420         lines_left = num_blame_lines;
421         blame_p = xmalloc(sizeof(int) * num_blame_lines);
422         new_lines = xmalloc(sizeof(int) * num_blame_lines);
423         do {
424                 struct commit_list *parents;
425                 int num_parents;
426                 struct util_info *util;
427
428                 if (DEBUG)
429                         printf("\nProcessing commit: %d %s\n", num_commits,
430                                sha1_to_hex(commit->object.sha1));
431
432                 if (lines_left == 0)
433                         return;
434
435                 num_commits++;
436                 memset(blame_p, 0, sizeof(int) * num_blame_lines);
437                 new_lines_len = 0;
438                 num_parents = 0;
439                 for (parents = commit->parents;
440                      parents != NULL; parents = parents->next)
441                         num_parents++;
442
443                 if(num_parents == 0)
444                         *initial = commit;
445
446                 if(fill_util_info(commit, path))
447                         continue;
448
449                 alloc_line_map(commit);
450                 util = commit->object.util;
451
452                 for (parents = commit->parents;
453                      parents != NULL; parents = parents->next) {
454                         struct commit *parent = parents->item;
455                         struct patch *patch;
456
457                         if (parse_commit(parent) < 0)
458                                 die("parse_commit error");
459
460                         if (DEBUG)
461                                 printf("parent: %s\n",
462                                        sha1_to_hex(parent->object.sha1));
463
464                         if(fill_util_info(parent, path)) {
465                                 num_parents--;
466                                 continue;
467                         }
468
469                         patch = get_patch(parent, commit);
470                         alloc_line_map(parent);
471                         fill_line_map(parent, commit, patch);
472
473                         for (i = 0; i < patch->num; i++) {
474                             int l;
475                             for (l = 0; l < patch->chunks[i].len2; l++) {
476                                 int mapped_line =
477                                     map_line(commit, patch->chunks[i].off2 + l);
478                                 if (mapped_line != -1) {
479                                     blame_p[mapped_line]++;
480                                     if (blame_p[mapped_line] == num_parents)
481                                         new_lines[new_lines_len++] = mapped_line;
482                                 }
483                             }
484                         }
485                         free_patch(patch);
486                 }
487
488                 if (DEBUG)
489                         printf("parents: %d\n", num_parents);
490
491                 for (i = 0; i < new_lines_len; i++) {
492                         int mapped_line = new_lines[i];
493                         if (blame_lines[mapped_line] == NULL) {
494                                 blame_lines[mapped_line] = commit;
495                                 lines_left--;
496                                 if (DEBUG)
497                                         printf("blame: mapped: %d i: %d\n",
498                                                mapped_line, i);
499                         }
500                 }
501         } while ((commit = get_revision(rev)) != NULL);
502 }
503
504 int main(int argc, const char **argv)
505 {
506         int i;
507         struct commit *initial = NULL;
508         unsigned char sha1[20];
509         const char* filename;
510         int num_args;
511         const char* args[10];
512         struct rev_info rev;
513
514         setup_git_directory();
515
516         if (argc != 3)
517                 die("Usage: blame commit-ish file");
518
519
520         filename = argv[2];
521
522         {
523                 struct commit* commit;
524                 if (get_sha1(argv[1], sha1))
525                         die("get_sha1 failed");
526                 commit = lookup_commit_reference(sha1);
527
528                 if (fill_util_info(commit, filename)) {
529                         printf("%s not found in %s\n", filename, argv[1]);
530                         return 1;
531                 }
532         }
533
534         num_args = 0;
535         args[num_args++] = NULL;
536         args[num_args++] = "--topo-order";
537         args[num_args++] = "--remove-empty";
538         args[num_args++] = argv[1];
539         args[num_args++] = "--";
540         args[num_args++] = filename;
541         args[num_args] = NULL;
542
543         setup_revisions(num_args, args, &rev, "HEAD");
544         prepare_revision_walk(&rev);
545         process_commits(&rev, filename, &initial);
546
547         for (i = 0; i < num_blame_lines; i++) {
548                 struct commit *c = blame_lines[i];
549                 if (!c)
550                         c = initial;
551
552                 printf("%d %.8s\n", i, sha1_to_hex(c->object.sha1));
553 // printf("%d %s\n", i, find_unique_abbrev(blame_lines[i]->object.sha1, 6));
554         }
555
556         if (DEBUG) {
557                 printf("num get patch: %d\n", num_get_patch);
558                 printf("num commits: %d\n", num_commits);
559                 printf("patch time: %f\n", patch_time / 1000000.0);
560                 printf("initial: %s\n", sha1_to_hex(initial->object.sha1));
561         }
562
563         return 0;
564 }