[PATCH] cvs2git and file permissions
[git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9 #include "delta.h"
10
11 #define REACHABLE 0x0001
12
13 static int show_root = 0;
14 static int show_tags = 0;
15 static int show_unreachable = 0;
16 static int show_max_delta_depth = 0;
17 static int keep_cache_objects = 0; 
18 static unsigned char head_sha1[20];
19
20 static void expand_deltas(void)
21 {
22         int i, max_depth = 0;
23
24         /*
25          * To be as efficient as possible we look for delta heads and
26          * recursively process them going backward, and parsing
27          * resulting objects along the way.  This allows for processing
28          * each delta objects only once regardless of the delta depth.
29          */
30         for (i = 0; i < nr_objs; i++) {
31                 struct object *obj = objs[i];
32                 if (obj->parsed && !obj->delta && obj->attached_deltas) {
33                         int depth = 0;
34                         char type[10];
35                         unsigned long size;
36                         void *buf = read_sha1_file(obj->sha1, type, &size);
37                         if (!buf)
38                                 continue;
39                         depth = process_deltas(buf, size, obj->type,
40                                                obj->attached_deltas);
41                         if (max_depth < depth)
42                                 max_depth = depth;
43                 }
44         }
45         if (show_max_delta_depth)
46                 printf("maximum delta depth = %d\n", max_depth);
47 }
48                                                                                                                         
49 static void check_connectivity(void)
50 {
51         int i;
52
53         /* Look up all the requirements, warn about missing objects.. */
54         for (i = 0; i < nr_objs; i++) {
55                 struct object *obj = objs[i];
56                 struct object_list *refs;
57
58                 if (!obj->parsed) {
59                         if (obj->delta)
60                                 printf("unresolved delta %s\n",
61                                        sha1_to_hex(obj->sha1));
62                         else
63                                 printf("missing %s %s\n",
64                                        obj->type, sha1_to_hex(obj->sha1));
65                         continue;
66                 }
67
68                 for (refs = obj->refs; refs; refs = refs->next) {
69                         if (refs->item->parsed)
70                                 continue;
71                         printf("broken link from %7s %s\n",
72                                obj->type, sha1_to_hex(obj->sha1));
73                         printf("              to %7s %s\n",
74                                refs->item->type, sha1_to_hex(refs->item->sha1));
75                 }
76
77                 /* Don't bother with tag reachability. */
78                 if (obj->type == tag_type)
79                         continue;
80
81                 if (show_unreachable && !(obj->flags & REACHABLE)) {
82                         if (obj->attached_deltas)
83                                 printf("foreign delta reference %s\n", 
84                                        sha1_to_hex(obj->sha1));
85                         else
86                                 printf("unreachable %s %s\n",
87                                        obj->type, sha1_to_hex(obj->sha1));
88                         continue;
89                 }
90
91                 if (!obj->used) {
92                         printf("dangling %s %s\n", obj->type, 
93                                sha1_to_hex(obj->sha1));
94                 }
95         }
96 }
97
98 /*
99  * The entries in a tree are ordered in the _path_ order,
100  * which means that a directory entry is ordered by adding
101  * a slash to the end of it.
102  *
103  * So a directory called "a" is ordered _after_ a file
104  * called "a.c", because "a/" sorts after "a.c".
105  */
106 #define TREE_UNORDERED (-1)
107 #define TREE_HAS_DUPS  (-2)
108
109 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
110 {
111         int len1 = strlen(a->name);
112         int len2 = strlen(b->name);
113         int len = len1 < len2 ? len1 : len2;
114         unsigned char c1, c2;
115         int cmp;
116
117         cmp = memcmp(a->name, b->name, len);
118         if (cmp < 0)
119                 return 0;
120         if (cmp > 0)
121                 return TREE_UNORDERED;
122
123         /*
124          * Ok, the first <len> characters are the same.
125          * Now we need to order the next one, but turn
126          * a '\0' into a '/' for a directory entry.
127          */
128         c1 = a->name[len];
129         c2 = b->name[len];
130         if (!c1 && !c2)
131                 /*
132                  * git-write-tree used to write out a nonsense tree that has
133                  * entries with the same name, one blob and one tree.  Make
134                  * sure we do not have duplicate entries.
135                  */
136                 return TREE_HAS_DUPS;
137         if (!c1 && a->directory)
138                 c1 = '/';
139         if (!c2 && b->directory)
140                 c2 = '/';
141         return c1 < c2 ? 0 : TREE_UNORDERED;
142 }
143
144 static int fsck_tree(struct tree *item)
145 {
146         int has_full_path = 0;
147         struct tree_entry_list *entry, *last;
148
149         last = NULL;
150         for (entry = item->entries; entry; entry = entry->next) {
151                 if (strchr(entry->name, '/'))
152                         has_full_path = 1;
153
154                 switch (entry->mode) {
155                 /*
156                  * Standard modes.. 
157                  */
158                 case S_IFREG | 0755:
159                 case S_IFREG | 0644:
160                 case S_IFLNK:
161                 case S_IFDIR:
162                         break;
163                 /*
164                  * This is nonstandard, but we had a few of these
165                  * early on when we honored the full set of mode
166                  * bits..
167                  */
168                 case S_IFREG | 0664:
169                         break;
170                 default:
171                         printf("tree %s has entry %o %s\n",
172                                 sha1_to_hex(item->object.sha1),
173                                 entry->mode, entry->name);
174                 }
175
176                 if (last) {
177                         switch (verify_ordered(last, entry)) {
178                         case TREE_UNORDERED:
179                                 fprintf(stderr, "tree %s not ordered\n",
180                                         sha1_to_hex(item->object.sha1));
181                                 return -1;
182                         case TREE_HAS_DUPS:
183                                 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
184                                         sha1_to_hex(item->object.sha1),
185                                         entry->name);
186                                 return -1;
187                         default:
188                                 break;
189                         }
190                 }
191
192                 last = entry;
193         }
194
195         if (has_full_path) {
196                 fprintf(stderr, "warning: git-fsck-cache: tree %s "
197                         "has full pathnames in it\n", 
198                         sha1_to_hex(item->object.sha1));
199         }
200
201         return 0;
202 }
203
204 static int fsck_commit(struct commit *commit)
205 {
206         free(commit->buffer);
207         commit->buffer = NULL;
208         if (!commit->tree)
209                 return -1;
210         if (!commit->parents && show_root)
211                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
212         if (!commit->date)
213                 printf("bad commit date in %s\n", 
214                        sha1_to_hex(commit->object.sha1));
215         return 0;
216 }
217
218 static int fsck_tag(struct tag *tag)
219 {
220         struct object *tagged = tag->tagged;
221
222         if (!tagged) {
223                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
224                 return -1;
225         }
226         if (!show_tags)
227                 return 0;
228
229         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
230         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
231         return 0;
232 }
233
234 static int fsck_sha1(unsigned char *sha1)
235 {
236         struct object *obj = parse_object(sha1);
237         if (!obj)
238                 return -1;
239         if (obj->type == blob_type)
240                 return 0;
241         if (obj->type == tree_type)
242                 return fsck_tree((struct tree *) obj);
243         if (obj->type == commit_type)
244                 return fsck_commit((struct commit *) obj);
245         if (obj->type == tag_type)
246                 return fsck_tag((struct tag *) obj);
247         if (!obj->type && obj->delta)
248                 return 0;
249         return -1;
250 }
251
252 /*
253  * This is the sorting chunk size: make it reasonably
254  * big so that we can sort well..
255  */
256 #define MAX_SHA1_ENTRIES (1024)
257
258 struct sha1_entry {
259         unsigned long ino;
260         unsigned char sha1[20];
261 };
262
263 static struct {
264         unsigned long nr;
265         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
266 } sha1_list;
267
268 static int ino_compare(const void *_a, const void *_b)
269 {
270         const struct sha1_entry *a = _a, *b = _b;
271         unsigned long ino1 = a->ino, ino2 = b->ino;
272         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
273 }
274
275 static void fsck_sha1_list(void)
276 {
277         int i, nr = sha1_list.nr;
278
279         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
280         for (i = 0; i < nr; i++) {
281                 struct sha1_entry *entry = sha1_list.entry[i];
282                 unsigned char *sha1 = entry->sha1;
283
284                 sha1_list.entry[i] = NULL;
285                 if (fsck_sha1(sha1) < 0)
286                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
287                 free(entry);
288         }
289         sha1_list.nr = 0;
290 }
291
292 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
293 {
294         struct sha1_entry *entry = xmalloc(sizeof(*entry));
295         int nr;
296
297         entry->ino = ino;
298         memcpy(entry->sha1, sha1, 20);
299         nr = sha1_list.nr;
300         if (nr == MAX_SHA1_ENTRIES) {
301                 fsck_sha1_list();
302                 nr = 0;
303         }
304         sha1_list.entry[nr] = entry;
305         sha1_list.nr = ++nr;
306 }
307
308 static int fsck_dir(int i, char *path)
309 {
310         DIR *dir = opendir(path);
311         struct dirent *de;
312
313         if (!dir) {
314                 return error("missing sha1 directory '%s'", path);
315         }
316
317         while ((de = readdir(dir)) != NULL) {
318                 char name[100];
319                 unsigned char sha1[20];
320                 int len = strlen(de->d_name);
321
322                 switch (len) {
323                 case 2:
324                         if (de->d_name[1] != '.')
325                                 break;
326                 case 1:
327                         if (de->d_name[0] != '.')
328                                 break;
329                         continue;
330                 case 38:
331                         sprintf(name, "%02x", i);
332                         memcpy(name+2, de->d_name, len+1);
333                         if (get_sha1_hex(name, sha1) < 0)
334                                 break;
335                         add_sha1_list(sha1, de->d_ino);
336                         continue;
337                 }
338                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
339         }
340         closedir(dir);
341         return 0;
342 }
343
344 static int read_sha1_reference(const char *path)
345 {
346         char hexname[60];
347         unsigned char sha1[20];
348         int fd = open(path, O_RDONLY), len;
349         struct object *obj;
350
351         if (fd < 0)
352                 return -1;
353
354         len = read(fd, hexname, sizeof(hexname));
355         close(fd);
356         if (len < 40)
357                 return -1;
358
359         if (get_sha1_hex(hexname, sha1) < 0)
360                 return -1;
361
362         obj = lookup_object(sha1);
363         if (!obj)
364                 return error("%s: invalid sha1 pointer %.40s", path, hexname);
365
366         obj->used = 1;
367         mark_reachable(obj, REACHABLE);
368         return 0;
369 }
370
371 static int find_file_objects(const char *base, const char *name)
372 {
373         int baselen = strlen(base);
374         int namelen = strlen(name);
375         char *path = xmalloc(baselen + namelen + 2);
376         struct stat st;
377
378         memcpy(path, base, baselen);
379         path[baselen] = '/';
380         memcpy(path + baselen + 1, name, namelen+1);
381         if (stat(path, &st) < 0)
382                 return 0;
383
384         /*
385          * Recurse into directories
386          */
387         if (S_ISDIR(st.st_mode)) {
388                 int count = 0;
389                 DIR *dir = opendir(path);
390                 if (dir) {
391                         struct dirent *de;
392                         while ((de = readdir(dir)) != NULL) {
393                                 if (de->d_name[0] == '.')
394                                         continue;
395                                 count += find_file_objects(path, de->d_name);
396                         }
397                         closedir(dir);
398                 }
399                 return count;
400         }
401         if (S_ISREG(st.st_mode))
402                 return read_sha1_reference(path) == 0;
403         return 0;
404 }
405
406 static void get_default_heads(void)
407 {
408         char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
409         int count = find_file_objects(git_dir, "refs");
410         if (!count)
411                 die("No default references");
412 }
413
414 int main(int argc, char **argv)
415 {
416         int i, heads;
417         char *sha1_dir;
418
419         for (i = 1; i < argc; i++) {
420                 const char *arg = argv[i];
421
422                 if (!strcmp(arg, "--unreachable")) {
423                         show_unreachable = 1;
424                         continue;
425                 }
426                 if (!strcmp(arg, "--tags")) {
427                         show_tags = 1;
428                         continue;
429                 }
430                 if (!strcmp(arg, "--root")) {
431                         show_root = 1;
432                         continue;
433                 }
434                 if (!strcmp(arg, "--delta-depth")) {
435                         show_max_delta_depth = 1;
436                         continue;
437                 }
438                 if (!strcmp(arg, "--cache")) {
439                         keep_cache_objects = 1;
440                         continue;
441                 }
442                 if (*arg == '-')
443                         usage("git-fsck-cache [--tags] [[--unreachable] [--cache] <head-sha1>*]");
444         }
445
446         sha1_dir = get_object_directory();
447         for (i = 0; i < 256; i++) {
448                 static char dir[4096];
449                 sprintf(dir, "%s/%02x", sha1_dir, i);
450                 fsck_dir(i, dir);
451         }
452         fsck_sha1_list();
453
454         expand_deltas();
455
456         heads = 0;
457         for (i = 1; i < argc; i++) {
458                 const char *arg = argv[i]; 
459
460                 if (*arg == '-')
461                         continue;
462
463                 if (!get_sha1(arg, head_sha1)) {
464                         struct object *obj = lookup_object(head_sha1);
465
466                         /* Error is printed by lookup_object(). */
467                         if (!obj)
468                                 continue;
469
470                         obj->used = 1;
471                         mark_reachable(obj, REACHABLE);
472                         heads++;
473                         continue;
474                 }
475                 error("expected sha1, got %s", arg);
476         }
477
478         /*
479          * If we've not been given any explicit head information, do the
480          * default ones from .git/refs. We also consider the index file
481          * in this case (ie this implies --cache).
482          */
483         if (!heads) {
484                 get_default_heads();
485                 keep_cache_objects = 1;
486         }
487
488         if (keep_cache_objects) {
489                 int i;
490                 read_cache();
491                 for (i = 0; i < active_nr; i++) {
492                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
493                         struct object *obj;
494                         if (!blob)
495                                 continue;
496                         obj = &blob->object;
497                         obj->used = 1;
498                         mark_reachable(obj, REACHABLE);
499                 }
500         }
501
502         check_connectivity();
503         return 0;
504 }