Merge fighting fsck-cache updates from Junio
[git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9
10 #define REACHABLE 0x0001
11
12 static int show_root = 0;
13 static int show_tags = 0;
14 static int show_unreachable = 0;
15 static int standalone = 0;
16 static int check_full = 0;
17 static int keep_cache_objects = 0; 
18 static unsigned char head_sha1[20];
19
20 static void check_connectivity(void)
21 {
22         int i;
23
24         /* Look up all the requirements, warn about missing objects.. */
25         for (i = 0; i < nr_objs; i++) {
26                 struct object *obj = objs[i];
27                 struct object_list *refs;
28
29                 if (!obj->parsed) {
30                         if (!standalone && has_sha1_file(obj->sha1))
31                                 ; /* it is in pack */
32                         else
33                                 printf("missing %s %s\n",
34                                        obj->type, sha1_to_hex(obj->sha1));
35                         continue;
36                 }
37
38                 for (refs = obj->refs; refs; refs = refs->next) {
39                         if (refs->item->parsed ||
40                             (!standalone && has_sha1_file(refs->item->sha1)))
41                                 continue;
42                         printf("broken link from %7s %s\n",
43                                obj->type, sha1_to_hex(obj->sha1));
44                         printf("              to %7s %s\n",
45                                refs->item->type, sha1_to_hex(refs->item->sha1));
46                 }
47
48                 if (show_unreachable && !(obj->flags & REACHABLE)) {
49                         printf("unreachable %s %s\n",
50                                obj->type, sha1_to_hex(obj->sha1));
51                         continue;
52                 }
53
54                 if (!obj->used) {
55                         printf("dangling %s %s\n", obj->type, 
56                                sha1_to_hex(obj->sha1));
57                 }
58         }
59 }
60
61 /*
62  * The entries in a tree are ordered in the _path_ order,
63  * which means that a directory entry is ordered by adding
64  * a slash to the end of it.
65  *
66  * So a directory called "a" is ordered _after_ a file
67  * called "a.c", because "a/" sorts after "a.c".
68  */
69 #define TREE_UNORDERED (-1)
70 #define TREE_HAS_DUPS  (-2)
71
72 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
73 {
74         int len1 = strlen(a->name);
75         int len2 = strlen(b->name);
76         int len = len1 < len2 ? len1 : len2;
77         unsigned char c1, c2;
78         int cmp;
79
80         cmp = memcmp(a->name, b->name, len);
81         if (cmp < 0)
82                 return 0;
83         if (cmp > 0)
84                 return TREE_UNORDERED;
85
86         /*
87          * Ok, the first <len> characters are the same.
88          * Now we need to order the next one, but turn
89          * a '\0' into a '/' for a directory entry.
90          */
91         c1 = a->name[len];
92         c2 = b->name[len];
93         if (!c1 && !c2)
94                 /*
95                  * git-write-tree used to write out a nonsense tree that has
96                  * entries with the same name, one blob and one tree.  Make
97                  * sure we do not have duplicate entries.
98                  */
99                 return TREE_HAS_DUPS;
100         if (!c1 && a->directory)
101                 c1 = '/';
102         if (!c2 && b->directory)
103                 c2 = '/';
104         return c1 < c2 ? 0 : TREE_UNORDERED;
105 }
106
107 static int fsck_tree(struct tree *item)
108 {
109         int has_full_path = 0;
110         struct tree_entry_list *entry, *last;
111
112         last = NULL;
113         for (entry = item->entries; entry; entry = entry->next) {
114                 if (strchr(entry->name, '/'))
115                         has_full_path = 1;
116
117                 switch (entry->mode) {
118                 /*
119                  * Standard modes.. 
120                  */
121                 case S_IFREG | 0755:
122                 case S_IFREG | 0644:
123                 case S_IFLNK:
124                 case S_IFDIR:
125                         break;
126                 /*
127                  * This is nonstandard, but we had a few of these
128                  * early on when we honored the full set of mode
129                  * bits..
130                  */
131                 case S_IFREG | 0664:
132                         break;
133                 default:
134                         printf("tree %s has entry %o %s\n",
135                                 sha1_to_hex(item->object.sha1),
136                                 entry->mode, entry->name);
137                 }
138
139                 if (last) {
140                         switch (verify_ordered(last, entry)) {
141                         case TREE_UNORDERED:
142                                 fprintf(stderr, "tree %s not ordered\n",
143                                         sha1_to_hex(item->object.sha1));
144                                 return -1;
145                         case TREE_HAS_DUPS:
146                                 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
147                                         sha1_to_hex(item->object.sha1),
148                                         entry->name);
149                                 return -1;
150                         default:
151                                 break;
152                         }
153                 }
154
155                 last = entry;
156         }
157
158         if (has_full_path) {
159                 fprintf(stderr, "warning: git-fsck-cache: tree %s "
160                         "has full pathnames in it\n", 
161                         sha1_to_hex(item->object.sha1));
162         }
163
164         return 0;
165 }
166
167 static int fsck_commit(struct commit *commit)
168 {
169         free(commit->buffer);
170         commit->buffer = NULL;
171         if (!commit->tree)
172                 return -1;
173         if (!commit->parents && show_root)
174                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
175         if (!commit->date)
176                 printf("bad commit date in %s\n", 
177                        sha1_to_hex(commit->object.sha1));
178         return 0;
179 }
180
181 static int fsck_tag(struct tag *tag)
182 {
183         struct object *tagged = tag->tagged;
184
185         if (!tagged) {
186                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
187                 return -1;
188         }
189         if (!show_tags)
190                 return 0;
191
192         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
193         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
194         return 0;
195 }
196
197 static int fsck_sha1(unsigned char *sha1)
198 {
199         struct object *obj = parse_object(sha1);
200         if (!obj)
201                 return -1;
202         if (obj->type == blob_type)
203                 return 0;
204         if (obj->type == tree_type)
205                 return fsck_tree((struct tree *) obj);
206         if (obj->type == commit_type)
207                 return fsck_commit((struct commit *) obj);
208         if (obj->type == tag_type)
209                 return fsck_tag((struct tag *) obj);
210         return -1;
211 }
212
213 /*
214  * This is the sorting chunk size: make it reasonably
215  * big so that we can sort well..
216  */
217 #define MAX_SHA1_ENTRIES (1024)
218
219 struct sha1_entry {
220         unsigned long ino;
221         unsigned char sha1[20];
222 };
223
224 static struct {
225         unsigned long nr;
226         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
227 } sha1_list;
228
229 static int ino_compare(const void *_a, const void *_b)
230 {
231         const struct sha1_entry *a = _a, *b = _b;
232         unsigned long ino1 = a->ino, ino2 = b->ino;
233         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
234 }
235
236 static void fsck_sha1_list(void)
237 {
238         int i, nr = sha1_list.nr;
239
240         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
241         for (i = 0; i < nr; i++) {
242                 struct sha1_entry *entry = sha1_list.entry[i];
243                 unsigned char *sha1 = entry->sha1;
244
245                 sha1_list.entry[i] = NULL;
246                 if (fsck_sha1(sha1) < 0)
247                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
248                 free(entry);
249         }
250         sha1_list.nr = 0;
251 }
252
253 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
254 {
255         struct sha1_entry *entry = xmalloc(sizeof(*entry));
256         int nr;
257
258         entry->ino = ino;
259         memcpy(entry->sha1, sha1, 20);
260         nr = sha1_list.nr;
261         if (nr == MAX_SHA1_ENTRIES) {
262                 fsck_sha1_list();
263                 nr = 0;
264         }
265         sha1_list.entry[nr] = entry;
266         sha1_list.nr = ++nr;
267 }
268
269 static int fsck_dir(int i, char *path)
270 {
271         DIR *dir = opendir(path);
272         struct dirent *de;
273
274         if (!dir) {
275                 return error("missing sha1 directory '%s'", path);
276         }
277
278         while ((de = readdir(dir)) != NULL) {
279                 char name[100];
280                 unsigned char sha1[20];
281                 int len = strlen(de->d_name);
282
283                 switch (len) {
284                 case 2:
285                         if (de->d_name[1] != '.')
286                                 break;
287                 case 1:
288                         if (de->d_name[0] != '.')
289                                 break;
290                         continue;
291                 case 38:
292                         sprintf(name, "%02x", i);
293                         memcpy(name+2, de->d_name, len+1);
294                         if (get_sha1_hex(name, sha1) < 0)
295                                 break;
296                         add_sha1_list(sha1, de->d_ino);
297                         continue;
298                 }
299                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
300         }
301         closedir(dir);
302         return 0;
303 }
304
305 static int read_sha1_reference(const char *path)
306 {
307         char hexname[60];
308         unsigned char sha1[20];
309         int fd = open(path, O_RDONLY), len;
310         struct object *obj;
311
312         if (fd < 0)
313                 return -1;
314
315         len = read(fd, hexname, sizeof(hexname));
316         close(fd);
317         if (len < 40)
318                 return -1;
319
320         if (get_sha1_hex(hexname, sha1) < 0)
321                 return -1;
322
323         obj = lookup_object(sha1);
324         if (!obj) {
325                 if (!standalone && has_sha1_file(sha1))
326                         return 0; /* it is in pack */
327                 return error("%s: invalid sha1 pointer %.40s", path, hexname);
328         }
329
330         obj->used = 1;
331         mark_reachable(obj, REACHABLE);
332         return 0;
333 }
334
335 static int find_file_objects(const char *base, const char *name)
336 {
337         int baselen = strlen(base);
338         int namelen = strlen(name);
339         char *path = xmalloc(baselen + namelen + 2);
340         struct stat st;
341
342         memcpy(path, base, baselen);
343         path[baselen] = '/';
344         memcpy(path + baselen + 1, name, namelen+1);
345         if (stat(path, &st) < 0)
346                 return 0;
347
348         /*
349          * Recurse into directories
350          */
351         if (S_ISDIR(st.st_mode)) {
352                 int count = 0;
353                 DIR *dir = opendir(path);
354                 if (dir) {
355                         struct dirent *de;
356                         while ((de = readdir(dir)) != NULL) {
357                                 if (de->d_name[0] == '.')
358                                         continue;
359                                 count += find_file_objects(path, de->d_name);
360                         }
361                         closedir(dir);
362                 }
363                 return count;
364         }
365         if (S_ISREG(st.st_mode))
366                 return read_sha1_reference(path) == 0;
367         return 0;
368 }
369
370 static void get_default_heads(void)
371 {
372         char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
373         int count = find_file_objects(git_dir, "refs");
374         if (!count)
375                 die("No default references");
376 }
377
378 static void fsck_object_dir(const char *path)
379 {
380         int i;
381         for (i = 0; i < 256; i++) {
382                 static char dir[4096];
383                 sprintf(dir, "%s/%02x", path, i);
384                 fsck_dir(i, dir);
385         }
386         fsck_sha1_list();
387 }
388
389 int main(int argc, char **argv)
390 {
391         int i, heads;
392
393         for (i = 1; i < argc; i++) {
394                 const char *arg = argv[i];
395
396                 if (!strcmp(arg, "--unreachable")) {
397                         show_unreachable = 1;
398                         continue;
399                 }
400                 if (!strcmp(arg, "--tags")) {
401                         show_tags = 1;
402                         continue;
403                 }
404                 if (!strcmp(arg, "--root")) {
405                         show_root = 1;
406                         continue;
407                 }
408                 if (!strcmp(arg, "--cache")) {
409                         keep_cache_objects = 1;
410                         continue;
411                 }
412                 if (!strcmp(arg, "--standalone")) {
413                         standalone = 1;
414                         continue;
415                 }
416                 if (!strcmp(arg, "--full")) {
417                         check_full = 1;
418                         continue;
419                 }
420                 if (*arg == '-')
421                         usage("git-fsck-cache [--tags] [[--unreachable] [--cache] [--standalone | --full] <head-sha1>*]");
422         }
423
424         if (standalone && check_full)
425                 die("Only one of --standalone or --full can be used.");
426         if (standalone)
427                 unsetenv("GIT_ALTERNATE_OBJECT_DIRECTORIES");
428
429         fsck_object_dir(get_object_directory());
430         if (check_full) {
431                 int j;
432                 struct packed_git *p;
433                 prepare_alt_odb();
434                 for (j = 0; alt_odb[j].base; j++) {
435                         alt_odb[j].name[-1] = 0; /* was slash */
436                         fsck_object_dir(alt_odb[j].base);
437                         alt_odb[j].name[-1] = '/';
438                 }
439                 prepare_packed_git();
440                 for (p = packed_git; p; p = p->next) {
441                         int num = num_packed_objects(p);
442                         for (i = 0; i < num; i++) {
443                                 unsigned char sha1[20];
444                                 nth_packed_object_sha1(p, i, sha1);
445                                 if (fsck_sha1(sha1) < 0)
446                                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
447
448                         }
449                 }
450         }
451
452         heads = 0;
453         for (i = 1; i < argc; i++) {
454                 const char *arg = argv[i]; 
455
456                 if (*arg == '-')
457                         continue;
458
459                 if (!get_sha1(arg, head_sha1)) {
460                         struct object *obj = lookup_object(head_sha1);
461
462                         /* Error is printed by lookup_object(). */
463                         if (!obj)
464                                 continue;
465
466                         obj->used = 1;
467                         mark_reachable(obj, REACHABLE);
468                         heads++;
469                         continue;
470                 }
471                 error("expected sha1, got %s", arg);
472         }
473
474         /*
475          * If we've not been given any explicit head information, do the
476          * default ones from .git/refs. We also consider the index file
477          * in this case (ie this implies --cache).
478          */
479         if (!heads) {
480                 get_default_heads();
481                 keep_cache_objects = 1;
482         }
483
484         if (keep_cache_objects) {
485                 int i;
486                 read_cache();
487                 for (i = 0; i < active_nr; i++) {
488                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
489                         struct object *obj;
490                         if (!blob)
491                                 continue;
492                         obj = &blob->object;
493                         obj->used = 1;
494                         mark_reachable(obj, REACHABLE);
495                 }
496         }
497
498         check_connectivity();
499         return 0;
500 }