7c55fe56a439d4ac02c288cb99c3d7b877751322
[git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9 #include "delta.h"
10
11 #define REACHABLE 0x0001
12
13 static int show_root = 0;
14 static int show_tags = 0;
15 static int show_unreachable = 0;
16 static int show_max_delta_depth = 0;
17 static int keep_cache_objects = 0; 
18 static unsigned char head_sha1[20];
19
20 static void expand_deltas(void)
21 {
22         int i, max_depth = 0;
23
24         /*
25          * To be as efficient as possible we look for delta heads and
26          * recursively process them going backward, and parsing
27          * resulting objects along the way.  This allows for processing
28          * each delta objects only once regardless of the delta depth.
29          */
30         for (i = 0; i < nr_objs; i++) {
31                 struct object *obj = objs[i];
32                 if (obj->parsed && !obj->delta && obj->attached_deltas) {
33                         int depth = 0;
34                         char type[10];
35                         unsigned long size;
36                         void *buf = read_sha1_file(obj->sha1, type, &size);
37                         if (!buf)
38                                 continue;
39                         depth = process_deltas(buf, size, obj->type,
40                                                obj->attached_deltas);
41                         if (max_depth < depth)
42                                 max_depth = depth;
43                 }
44         }
45         if (show_max_delta_depth)
46                 printf("maximum delta depth = %d\n", max_depth);
47 }
48                                                                                                                         
49 static void check_connectivity(void)
50 {
51         int i;
52
53         /* Look up all the requirements, warn about missing objects.. */
54         for (i = 0; i < nr_objs; i++) {
55                 struct object *obj = objs[i];
56                 struct object_list *refs;
57
58                 if (!obj->parsed) {
59                         if (obj->delta)
60                                 printf("unresolved delta %s\n",
61                                        sha1_to_hex(obj->sha1));
62                         else
63                                 printf("missing %s %s\n",
64                                        obj->type, sha1_to_hex(obj->sha1));
65                         continue;
66                 }
67
68                 for (refs = obj->refs; refs; refs = refs->next) {
69                         if (refs->item->parsed)
70                                 continue;
71                         printf("broken link from %7s %s\n",
72                                obj->type, sha1_to_hex(obj->sha1));
73                         printf("              to %7s %s\n",
74                                refs->item->type, sha1_to_hex(refs->item->sha1));
75                 }
76
77                 if (show_unreachable && !(obj->flags & REACHABLE)) {
78                         if (obj->attached_deltas)
79                                 printf("foreign delta reference %s\n", 
80                                        sha1_to_hex(obj->sha1));
81                         else
82                                 printf("unreachable %s %s\n",
83                                        obj->type, sha1_to_hex(obj->sha1));
84                         continue;
85                 }
86
87                 if (!obj->used) {
88                         printf("dangling %s %s\n", obj->type, 
89                                sha1_to_hex(obj->sha1));
90                 }
91         }
92 }
93
94 /*
95  * The entries in a tree are ordered in the _path_ order,
96  * which means that a directory entry is ordered by adding
97  * a slash to the end of it.
98  *
99  * So a directory called "a" is ordered _after_ a file
100  * called "a.c", because "a/" sorts after "a.c".
101  */
102 #define TREE_UNORDERED (-1)
103 #define TREE_HAS_DUPS  (-2)
104
105 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
106 {
107         int len1 = strlen(a->name);
108         int len2 = strlen(b->name);
109         int len = len1 < len2 ? len1 : len2;
110         unsigned char c1, c2;
111         int cmp;
112
113         cmp = memcmp(a->name, b->name, len);
114         if (cmp < 0)
115                 return 0;
116         if (cmp > 0)
117                 return TREE_UNORDERED;
118
119         /*
120          * Ok, the first <len> characters are the same.
121          * Now we need to order the next one, but turn
122          * a '\0' into a '/' for a directory entry.
123          */
124         c1 = a->name[len];
125         c2 = b->name[len];
126         if (!c1 && !c2)
127                 /*
128                  * git-write-tree used to write out a nonsense tree that has
129                  * entries with the same name, one blob and one tree.  Make
130                  * sure we do not have duplicate entries.
131                  */
132                 return TREE_HAS_DUPS;
133         if (!c1 && a->directory)
134                 c1 = '/';
135         if (!c2 && b->directory)
136                 c2 = '/';
137         return c1 < c2 ? 0 : TREE_UNORDERED;
138 }
139
140 static int fsck_tree(struct tree *item)
141 {
142         int has_full_path = 0;
143         struct tree_entry_list *entry, *last;
144
145         last = NULL;
146         for (entry = item->entries; entry; entry = entry->next) {
147                 if (strchr(entry->name, '/'))
148                         has_full_path = 1;
149
150                 switch (entry->mode) {
151                 /*
152                  * Standard modes.. 
153                  */
154                 case S_IFREG | 0755:
155                 case S_IFREG | 0644:
156                 case S_IFLNK:
157                 case S_IFDIR:
158                         break;
159                 /*
160                  * This is nonstandard, but we had a few of these
161                  * early on when we honored the full set of mode
162                  * bits..
163                  */
164                 case S_IFREG | 0664:
165                         break;
166                 default:
167                         printf("tree %s has entry %o %s\n",
168                                 sha1_to_hex(item->object.sha1),
169                                 entry->mode, entry->name);
170                 }
171
172                 if (last) {
173                         switch (verify_ordered(last, entry)) {
174                         case TREE_UNORDERED:
175                                 fprintf(stderr, "tree %s not ordered\n",
176                                         sha1_to_hex(item->object.sha1));
177                                 return -1;
178                         case TREE_HAS_DUPS:
179                                 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
180                                         sha1_to_hex(item->object.sha1),
181                                         entry->name);
182                                 return -1;
183                         default:
184                                 break;
185                         }
186                 }
187
188                 last = entry;
189         }
190
191         if (has_full_path) {
192                 fprintf(stderr, "warning: git-fsck-cache: tree %s "
193                         "has full pathnames in it\n", 
194                         sha1_to_hex(item->object.sha1));
195         }
196
197         return 0;
198 }
199
200 static int fsck_commit(struct commit *commit)
201 {
202         free(commit->buffer);
203         commit->buffer = NULL;
204         if (!commit->tree)
205                 return -1;
206         if (!commit->parents && show_root)
207                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
208         if (!commit->date)
209                 printf("bad commit date in %s\n", 
210                        sha1_to_hex(commit->object.sha1));
211         return 0;
212 }
213
214 static int fsck_tag(struct tag *tag)
215 {
216         struct object *tagged = tag->tagged;
217
218         if (!tagged) {
219                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
220                 return -1;
221         }
222         if (!show_tags)
223                 return 0;
224
225         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
226         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
227         return 0;
228 }
229
230 static int fsck_sha1(unsigned char *sha1)
231 {
232         struct object *obj = parse_object(sha1);
233         if (!obj)
234                 return -1;
235         if (obj->type == blob_type)
236                 return 0;
237         if (obj->type == tree_type)
238                 return fsck_tree((struct tree *) obj);
239         if (obj->type == commit_type)
240                 return fsck_commit((struct commit *) obj);
241         if (obj->type == tag_type)
242                 return fsck_tag((struct tag *) obj);
243         if (!obj->type && obj->delta)
244                 return 0;
245         return -1;
246 }
247
248 /*
249  * This is the sorting chunk size: make it reasonably
250  * big so that we can sort well..
251  */
252 #define MAX_SHA1_ENTRIES (1024)
253
254 struct sha1_entry {
255         unsigned long ino;
256         unsigned char sha1[20];
257 };
258
259 static struct {
260         unsigned long nr;
261         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
262 } sha1_list;
263
264 static int ino_compare(const void *_a, const void *_b)
265 {
266         const struct sha1_entry *a = _a, *b = _b;
267         unsigned long ino1 = a->ino, ino2 = b->ino;
268         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
269 }
270
271 static void fsck_sha1_list(void)
272 {
273         int i, nr = sha1_list.nr;
274
275         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
276         for (i = 0; i < nr; i++) {
277                 struct sha1_entry *entry = sha1_list.entry[i];
278                 unsigned char *sha1 = entry->sha1;
279
280                 sha1_list.entry[i] = NULL;
281                 if (fsck_sha1(sha1) < 0)
282                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
283                 free(entry);
284         }
285         sha1_list.nr = 0;
286 }
287
288 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
289 {
290         struct sha1_entry *entry = xmalloc(sizeof(*entry));
291         int nr;
292
293         entry->ino = ino;
294         memcpy(entry->sha1, sha1, 20);
295         nr = sha1_list.nr;
296         if (nr == MAX_SHA1_ENTRIES) {
297                 fsck_sha1_list();
298                 nr = 0;
299         }
300         sha1_list.entry[nr] = entry;
301         sha1_list.nr = ++nr;
302 }
303
304 static int fsck_dir(int i, char *path)
305 {
306         DIR *dir = opendir(path);
307         struct dirent *de;
308
309         if (!dir) {
310                 return error("missing sha1 directory '%s'", path);
311         }
312
313         while ((de = readdir(dir)) != NULL) {
314                 char name[100];
315                 unsigned char sha1[20];
316                 int len = strlen(de->d_name);
317
318                 switch (len) {
319                 case 2:
320                         if (de->d_name[1] != '.')
321                                 break;
322                 case 1:
323                         if (de->d_name[0] != '.')
324                                 break;
325                         continue;
326                 case 38:
327                         sprintf(name, "%02x", i);
328                         memcpy(name+2, de->d_name, len+1);
329                         if (get_sha1_hex(name, sha1) < 0)
330                                 break;
331                         add_sha1_list(sha1, de->d_ino);
332                         continue;
333                 }
334                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
335         }
336         closedir(dir);
337         return 0;
338 }
339
340 static int read_sha1_reference(const char *path)
341 {
342         char hexname[60];
343         unsigned char sha1[20];
344         int fd = open(path, O_RDONLY), len;
345         struct object *obj;
346
347         if (fd < 0)
348                 return -1;
349
350         len = read(fd, hexname, sizeof(hexname));
351         close(fd);
352         if (len < 40)
353                 return -1;
354
355         if (get_sha1_hex(hexname, sha1) < 0)
356                 return -1;
357
358         obj = lookup_object(sha1);
359         if (!obj)
360                 return error("%s: invalid sha1 pointer %.40s", path, hexname);
361
362         obj->used = 1;
363         mark_reachable(obj, REACHABLE);
364         return 0;
365 }
366
367 static int find_file_objects(const char *base, const char *name)
368 {
369         int baselen = strlen(base);
370         int namelen = strlen(name);
371         char *path = xmalloc(baselen + namelen + 2);
372         struct stat st;
373
374         memcpy(path, base, baselen);
375         path[baselen] = '/';
376         memcpy(path + baselen + 1, name, namelen+1);
377         if (stat(path, &st) < 0)
378                 return 0;
379
380         /*
381          * Recurse into directories
382          */
383         if (S_ISDIR(st.st_mode)) {
384                 int count = 0;
385                 DIR *dir = opendir(path);
386                 if (dir) {
387                         struct dirent *de;
388                         while ((de = readdir(dir)) != NULL) {
389                                 if (de->d_name[0] == '.')
390                                         continue;
391                                 count += find_file_objects(path, de->d_name);
392                         }
393                         closedir(dir);
394                 }
395                 return count;
396         }
397         if (S_ISREG(st.st_mode))
398                 return read_sha1_reference(path) == 0;
399         return 0;
400 }
401
402 static void get_default_heads(void)
403 {
404         char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
405         int count = find_file_objects(git_dir, "refs");
406         if (!count)
407                 die("No default references");
408 }
409
410 int main(int argc, char **argv)
411 {
412         int i, heads;
413         char *sha1_dir;
414
415         for (i = 1; i < argc; i++) {
416                 const char *arg = argv[i];
417
418                 if (!strcmp(arg, "--unreachable")) {
419                         show_unreachable = 1;
420                         continue;
421                 }
422                 if (!strcmp(arg, "--tags")) {
423                         show_tags = 1;
424                         continue;
425                 }
426                 if (!strcmp(arg, "--root")) {
427                         show_root = 1;
428                         continue;
429                 }
430                 if (!strcmp(arg, "--delta-depth")) {
431                         show_max_delta_depth = 1;
432                         continue;
433                 }
434                 if (!strcmp(arg, "--cache")) {
435                         keep_cache_objects = 1;
436                         continue;
437                 }
438                 if (*arg == '-')
439                         usage("git-fsck-cache [--tags] [[--unreachable] [--cache] <head-sha1>*]");
440         }
441
442         sha1_dir = get_object_directory();
443         for (i = 0; i < 256; i++) {
444                 static char dir[4096];
445                 sprintf(dir, "%s/%02x", sha1_dir, i);
446                 fsck_dir(i, dir);
447         }
448         fsck_sha1_list();
449
450         expand_deltas();
451
452         heads = 0;
453         for (i = 1; i < argc; i++) {
454                 const char *arg = argv[i]; 
455
456                 if (*arg == '-')
457                         continue;
458
459                 if (!get_sha1(arg, head_sha1)) {
460                         struct object *obj = lookup_object(head_sha1);
461
462                         /* Error is printed by lookup_object(). */
463                         if (!obj)
464                                 continue;
465
466                         obj->used = 1;
467                         mark_reachable(obj, REACHABLE);
468                         heads++;
469                         continue;
470                 }
471                 error("expected sha1, got %s", arg);
472         }
473
474         /*
475          * If we've not been given any explicit head information, do the
476          * default ones from .git/refs. We also consider the index file
477          * in this case (ie this implies --cache).
478          */
479         if (!heads) {
480                 get_default_heads();
481                 keep_cache_objects = 1;
482         }
483
484         if (keep_cache_objects) {
485                 int i;
486                 read_cache();
487                 for (i = 0; i < active_nr; i++) {
488                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
489                         struct object *obj;
490                         if (!blob)
491                                 continue;
492                         obj = &blob->object;
493                         obj->used = 1;
494                         mark_reachable(obj, REACHABLE);
495                 }
496         }
497
498         check_connectivity();
499         return 0;
500 }