49c606f87f410ebfecc491999b45d246954ddd55
[git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9
10 #define REACHABLE 0x0001
11
12 static int show_root = 0;
13 static int show_tags = 0;
14 static int show_unreachable = 0;
15 static int standalone = 0;
16 static int keep_cache_objects = 0; 
17 static unsigned char head_sha1[20];
18
19 static void check_connectivity(void)
20 {
21         int i;
22
23         /* Look up all the requirements, warn about missing objects.. */
24         for (i = 0; i < nr_objs; i++) {
25                 struct object *obj = objs[i];
26                 struct object_list *refs;
27
28                 if (!obj->parsed) {
29                         if (!standalone && has_sha1_file(obj->sha1))
30                                 ; /* it is in pack */
31                         else
32                                 printf("missing %s %s\n",
33                                        obj->type, sha1_to_hex(obj->sha1));
34                         continue;
35                 }
36
37                 for (refs = obj->refs; refs; refs = refs->next) {
38                         if (refs->item->parsed ||
39                             (!standalone && has_sha1_file(refs->item->sha1)))
40                                 continue;
41                         printf("broken link from %7s %s\n",
42                                obj->type, sha1_to_hex(obj->sha1));
43                         printf("              to %7s %s\n",
44                                refs->item->type, sha1_to_hex(refs->item->sha1));
45                 }
46
47                 if (show_unreachable && !(obj->flags & REACHABLE)) {
48                         printf("unreachable %s %s\n",
49                                obj->type, sha1_to_hex(obj->sha1));
50                         continue;
51                 }
52
53                 if (!obj->used) {
54                         printf("dangling %s %s\n", obj->type, 
55                                sha1_to_hex(obj->sha1));
56                 }
57         }
58 }
59
60 /*
61  * The entries in a tree are ordered in the _path_ order,
62  * which means that a directory entry is ordered by adding
63  * a slash to the end of it.
64  *
65  * So a directory called "a" is ordered _after_ a file
66  * called "a.c", because "a/" sorts after "a.c".
67  */
68 #define TREE_UNORDERED (-1)
69 #define TREE_HAS_DUPS  (-2)
70
71 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
72 {
73         int len1 = strlen(a->name);
74         int len2 = strlen(b->name);
75         int len = len1 < len2 ? len1 : len2;
76         unsigned char c1, c2;
77         int cmp;
78
79         cmp = memcmp(a->name, b->name, len);
80         if (cmp < 0)
81                 return 0;
82         if (cmp > 0)
83                 return TREE_UNORDERED;
84
85         /*
86          * Ok, the first <len> characters are the same.
87          * Now we need to order the next one, but turn
88          * a '\0' into a '/' for a directory entry.
89          */
90         c1 = a->name[len];
91         c2 = b->name[len];
92         if (!c1 && !c2)
93                 /*
94                  * git-write-tree used to write out a nonsense tree that has
95                  * entries with the same name, one blob and one tree.  Make
96                  * sure we do not have duplicate entries.
97                  */
98                 return TREE_HAS_DUPS;
99         if (!c1 && a->directory)
100                 c1 = '/';
101         if (!c2 && b->directory)
102                 c2 = '/';
103         return c1 < c2 ? 0 : TREE_UNORDERED;
104 }
105
106 static int fsck_tree(struct tree *item)
107 {
108         int has_full_path = 0;
109         struct tree_entry_list *entry, *last;
110
111         last = NULL;
112         for (entry = item->entries; entry; entry = entry->next) {
113                 if (strchr(entry->name, '/'))
114                         has_full_path = 1;
115
116                 switch (entry->mode) {
117                 /*
118                  * Standard modes.. 
119                  */
120                 case S_IFREG | 0755:
121                 case S_IFREG | 0644:
122                 case S_IFLNK:
123                 case S_IFDIR:
124                         break;
125                 /*
126                  * This is nonstandard, but we had a few of these
127                  * early on when we honored the full set of mode
128                  * bits..
129                  */
130                 case S_IFREG | 0664:
131                         break;
132                 default:
133                         printf("tree %s has entry %o %s\n",
134                                 sha1_to_hex(item->object.sha1),
135                                 entry->mode, entry->name);
136                 }
137
138                 if (last) {
139                         switch (verify_ordered(last, entry)) {
140                         case TREE_UNORDERED:
141                                 fprintf(stderr, "tree %s not ordered\n",
142                                         sha1_to_hex(item->object.sha1));
143                                 return -1;
144                         case TREE_HAS_DUPS:
145                                 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
146                                         sha1_to_hex(item->object.sha1),
147                                         entry->name);
148                                 return -1;
149                         default:
150                                 break;
151                         }
152                 }
153
154                 last = entry;
155         }
156
157         if (has_full_path) {
158                 fprintf(stderr, "warning: git-fsck-cache: tree %s "
159                         "has full pathnames in it\n", 
160                         sha1_to_hex(item->object.sha1));
161         }
162
163         return 0;
164 }
165
166 static int fsck_commit(struct commit *commit)
167 {
168         free(commit->buffer);
169         commit->buffer = NULL;
170         if (!commit->tree)
171                 return -1;
172         if (!commit->parents && show_root)
173                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
174         if (!commit->date)
175                 printf("bad commit date in %s\n", 
176                        sha1_to_hex(commit->object.sha1));
177         return 0;
178 }
179
180 static int fsck_tag(struct tag *tag)
181 {
182         struct object *tagged = tag->tagged;
183
184         if (!tagged) {
185                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
186                 return -1;
187         }
188         if (!show_tags)
189                 return 0;
190
191         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
192         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
193         return 0;
194 }
195
196 static int fsck_sha1(unsigned char *sha1)
197 {
198         struct object *obj = parse_object(sha1);
199         if (!obj)
200                 return -1;
201         if (obj->type == blob_type)
202                 return 0;
203         if (obj->type == tree_type)
204                 return fsck_tree((struct tree *) obj);
205         if (obj->type == commit_type)
206                 return fsck_commit((struct commit *) obj);
207         if (obj->type == tag_type)
208                 return fsck_tag((struct tag *) obj);
209         return -1;
210 }
211
212 /*
213  * This is the sorting chunk size: make it reasonably
214  * big so that we can sort well..
215  */
216 #define MAX_SHA1_ENTRIES (1024)
217
218 struct sha1_entry {
219         unsigned long ino;
220         unsigned char sha1[20];
221 };
222
223 static struct {
224         unsigned long nr;
225         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
226 } sha1_list;
227
228 static int ino_compare(const void *_a, const void *_b)
229 {
230         const struct sha1_entry *a = _a, *b = _b;
231         unsigned long ino1 = a->ino, ino2 = b->ino;
232         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
233 }
234
235 static void fsck_sha1_list(void)
236 {
237         int i, nr = sha1_list.nr;
238
239         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
240         for (i = 0; i < nr; i++) {
241                 struct sha1_entry *entry = sha1_list.entry[i];
242                 unsigned char *sha1 = entry->sha1;
243
244                 sha1_list.entry[i] = NULL;
245                 if (fsck_sha1(sha1) < 0)
246                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
247                 free(entry);
248         }
249         sha1_list.nr = 0;
250 }
251
252 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
253 {
254         struct sha1_entry *entry = xmalloc(sizeof(*entry));
255         int nr;
256
257         entry->ino = ino;
258         memcpy(entry->sha1, sha1, 20);
259         nr = sha1_list.nr;
260         if (nr == MAX_SHA1_ENTRIES) {
261                 fsck_sha1_list();
262                 nr = 0;
263         }
264         sha1_list.entry[nr] = entry;
265         sha1_list.nr = ++nr;
266 }
267
268 static int fsck_dir(int i, char *path)
269 {
270         DIR *dir = opendir(path);
271         struct dirent *de;
272
273         if (!dir) {
274                 return error("missing sha1 directory '%s'", path);
275         }
276
277         while ((de = readdir(dir)) != NULL) {
278                 char name[100];
279                 unsigned char sha1[20];
280                 int len = strlen(de->d_name);
281
282                 switch (len) {
283                 case 2:
284                         if (de->d_name[1] != '.')
285                                 break;
286                 case 1:
287                         if (de->d_name[0] != '.')
288                                 break;
289                         continue;
290                 case 38:
291                         sprintf(name, "%02x", i);
292                         memcpy(name+2, de->d_name, len+1);
293                         if (get_sha1_hex(name, sha1) < 0)
294                                 break;
295                         add_sha1_list(sha1, de->d_ino);
296                         continue;
297                 }
298                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
299         }
300         closedir(dir);
301         return 0;
302 }
303
304 static int read_sha1_reference(const char *path)
305 {
306         char hexname[60];
307         unsigned char sha1[20];
308         int fd = open(path, O_RDONLY), len;
309         struct object *obj;
310
311         if (fd < 0)
312                 return -1;
313
314         len = read(fd, hexname, sizeof(hexname));
315         close(fd);
316         if (len < 40)
317                 return -1;
318
319         if (get_sha1_hex(hexname, sha1) < 0)
320                 return -1;
321
322         obj = lookup_object(sha1);
323         if (!obj) {
324                 if (!standalone && has_sha1_file(sha1))
325                         return 0; /* it is in pack */
326                 return error("%s: invalid sha1 pointer %.40s", path, hexname);
327         }
328
329         obj->used = 1;
330         mark_reachable(obj, REACHABLE);
331         return 0;
332 }
333
334 static int find_file_objects(const char *base, const char *name)
335 {
336         int baselen = strlen(base);
337         int namelen = strlen(name);
338         char *path = xmalloc(baselen + namelen + 2);
339         struct stat st;
340
341         memcpy(path, base, baselen);
342         path[baselen] = '/';
343         memcpy(path + baselen + 1, name, namelen+1);
344         if (stat(path, &st) < 0)
345                 return 0;
346
347         /*
348          * Recurse into directories
349          */
350         if (S_ISDIR(st.st_mode)) {
351                 int count = 0;
352                 DIR *dir = opendir(path);
353                 if (dir) {
354                         struct dirent *de;
355                         while ((de = readdir(dir)) != NULL) {
356                                 if (de->d_name[0] == '.')
357                                         continue;
358                                 count += find_file_objects(path, de->d_name);
359                         }
360                         closedir(dir);
361                 }
362                 return count;
363         }
364         if (S_ISREG(st.st_mode))
365                 return read_sha1_reference(path) == 0;
366         return 0;
367 }
368
369 static void get_default_heads(void)
370 {
371         char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
372         int count = find_file_objects(git_dir, "refs");
373         if (!count)
374                 die("No default references");
375 }
376
377 int main(int argc, char **argv)
378 {
379         int i, heads;
380         char *sha1_dir;
381
382         for (i = 1; i < argc; i++) {
383                 const char *arg = argv[i];
384
385                 if (!strcmp(arg, "--unreachable")) {
386                         show_unreachable = 1;
387                         continue;
388                 }
389                 if (!strcmp(arg, "--tags")) {
390                         show_tags = 1;
391                         continue;
392                 }
393                 if (!strcmp(arg, "--root")) {
394                         show_root = 1;
395                         continue;
396                 }
397                 if (!strcmp(arg, "--cache")) {
398                         keep_cache_objects = 1;
399                         continue;
400                 }
401                 if (!strcmp(arg, "--standalone")) {
402                         standalone = 1;
403                         continue;
404                 }
405                 if (*arg == '-')
406                         usage("git-fsck-cache [--tags] [[--unreachable] [--cache] <head-sha1>*]");
407         }
408
409         sha1_dir = get_object_directory();
410         for (i = 0; i < 256; i++) {
411                 static char dir[4096];
412                 sprintf(dir, "%s/%02x", sha1_dir, i);
413                 fsck_dir(i, dir);
414         }
415         fsck_sha1_list();
416
417         heads = 0;
418         for (i = 1; i < argc; i++) {
419                 const char *arg = argv[i]; 
420
421                 if (*arg == '-')
422                         continue;
423
424                 if (!get_sha1(arg, head_sha1)) {
425                         struct object *obj = lookup_object(head_sha1);
426
427                         /* Error is printed by lookup_object(). */
428                         if (!obj)
429                                 continue;
430
431                         obj->used = 1;
432                         mark_reachable(obj, REACHABLE);
433                         heads++;
434                         continue;
435                 }
436                 error("expected sha1, got %s", arg);
437         }
438
439         /*
440          * If we've not been given any explicit head information, do the
441          * default ones from .git/refs. We also consider the index file
442          * in this case (ie this implies --cache).
443          */
444         if (!heads) {
445                 get_default_heads();
446                 keep_cache_objects = 1;
447         }
448
449         if (keep_cache_objects) {
450                 int i;
451                 read_cache();
452                 for (i = 0; i < active_nr; i++) {
453                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
454                         struct object *obj;
455                         if (!blob)
456                                 continue;
457                         obj = &blob->object;
458                         obj->used = 1;
459                         mark_reachable(obj, REACHABLE);
460                 }
461         }
462
463         check_connectivity();
464         return 0;
465 }