[PATCH] Improved "git add"
[git.git] / fsck-objects.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9 #include "refs.h"
10 #include "pack.h"
11
12 #define REACHABLE 0x0001
13
14 static int show_root = 0;
15 static int show_tags = 0;
16 static int show_unreachable = 0;
17 static int standalone = 0;
18 static int check_full = 0;
19 static int check_strict = 0;
20 static int keep_cache_objects = 0; 
21 static unsigned char head_sha1[20];
22
23 static void check_connectivity(void)
24 {
25         int i;
26
27         /* Look up all the requirements, warn about missing objects.. */
28         for (i = 0; i < nr_objs; i++) {
29                 struct object *obj = objs[i];
30                 struct object_list *refs;
31
32                 if (!obj->parsed) {
33                         if (!standalone && has_sha1_file(obj->sha1))
34                                 ; /* it is in pack */
35                         else
36                                 printf("missing %s %s\n",
37                                        obj->type, sha1_to_hex(obj->sha1));
38                         continue;
39                 }
40
41                 for (refs = obj->refs; refs; refs = refs->next) {
42                         if (refs->item->parsed ||
43                             (!standalone && has_sha1_file(refs->item->sha1)))
44                                 continue;
45                         printf("broken link from %7s %s\n",
46                                obj->type, sha1_to_hex(obj->sha1));
47                         printf("              to %7s %s\n",
48                                refs->item->type, sha1_to_hex(refs->item->sha1));
49                 }
50
51                 if (show_unreachable && !(obj->flags & REACHABLE)) {
52                         printf("unreachable %s %s\n",
53                                obj->type, sha1_to_hex(obj->sha1));
54                         continue;
55                 }
56
57                 if (!obj->used) {
58                         printf("dangling %s %s\n", obj->type, 
59                                sha1_to_hex(obj->sha1));
60                 }
61         }
62 }
63
64 /*
65  * The entries in a tree are ordered in the _path_ order,
66  * which means that a directory entry is ordered by adding
67  * a slash to the end of it.
68  *
69  * So a directory called "a" is ordered _after_ a file
70  * called "a.c", because "a/" sorts after "a.c".
71  */
72 #define TREE_UNORDERED (-1)
73 #define TREE_HAS_DUPS  (-2)
74
75 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
76 {
77         int len1 = strlen(a->name);
78         int len2 = strlen(b->name);
79         int len = len1 < len2 ? len1 : len2;
80         unsigned char c1, c2;
81         int cmp;
82
83         cmp = memcmp(a->name, b->name, len);
84         if (cmp < 0)
85                 return 0;
86         if (cmp > 0)
87                 return TREE_UNORDERED;
88
89         /*
90          * Ok, the first <len> characters are the same.
91          * Now we need to order the next one, but turn
92          * a '\0' into a '/' for a directory entry.
93          */
94         c1 = a->name[len];
95         c2 = b->name[len];
96         if (!c1 && !c2)
97                 /*
98                  * git-write-tree used to write out a nonsense tree that has
99                  * entries with the same name, one blob and one tree.  Make
100                  * sure we do not have duplicate entries.
101                  */
102                 return TREE_HAS_DUPS;
103         if (!c1 && a->directory)
104                 c1 = '/';
105         if (!c2 && b->directory)
106                 c2 = '/';
107         return c1 < c2 ? 0 : TREE_UNORDERED;
108 }
109
110 static int fsck_tree(struct tree *item)
111 {
112         int retval;
113         int has_full_path = 0;
114         int has_zero_pad = 0;
115         int has_bad_modes = 0;
116         int has_dup_entries = 0;
117         int not_properly_sorted = 0;
118         struct tree_entry_list *entry, *last;
119
120         last = NULL;
121         for (entry = item->entries; entry; entry = entry->next) {
122                 if (strchr(entry->name, '/'))
123                         has_full_path = 1;
124                 has_zero_pad |= entry->zeropad;
125
126                 switch (entry->mode) {
127                 /*
128                  * Standard modes.. 
129                  */
130                 case S_IFREG | 0755:
131                 case S_IFREG | 0644:
132                 case S_IFLNK:
133                 case S_IFDIR:
134                         break;
135                 /*
136                  * This is nonstandard, but we had a few of these
137                  * early on when we honored the full set of mode
138                  * bits..
139                  */
140                 case S_IFREG | 0664:
141                         if (!check_strict)
142                                 break;
143                 default:
144                         has_bad_modes = 1;
145                 }
146
147                 if (last) {
148                         switch (verify_ordered(last, entry)) {
149                         case TREE_UNORDERED:
150                                 not_properly_sorted = 1;
151                                 break;
152                         case TREE_HAS_DUPS:
153                                 has_dup_entries = 1;
154                                 break;
155                         default:
156                                 break;
157                         }
158                 }
159
160                 last = entry;
161         }
162
163         retval = 0;
164         if (has_full_path) {
165                 fprintf(stderr, "warning: git-fsck-objects: tree %s "
166                         "has full pathnames in it\n", 
167                         sha1_to_hex(item->object.sha1));
168         }
169         if (has_zero_pad) {
170                 fprintf(stderr, "warning: git-fsck-objects: tree %s "
171                         "has zero-padded file modes in it\n",
172                         sha1_to_hex(item->object.sha1));
173         }
174         if (has_bad_modes) {
175                 fprintf(stderr, "warning: git-fsck-objects: tree %s "
176                         "has bad file modes in it\n",
177                         sha1_to_hex(item->object.sha1));
178         }
179         if (has_dup_entries) {
180                 fprintf(stderr, "error: git-fsck-objects: tree %s "
181                         "has duplicate file entries\n",
182                         sha1_to_hex(item->object.sha1));
183                 retval = -1;
184         }
185         if (not_properly_sorted) {
186                 fprintf(stderr, "error: git-fsck-objects: tree %s "
187                         "is not properly sorted\n",
188                         sha1_to_hex(item->object.sha1));
189                 retval = -1;
190         }
191         return retval;
192 }
193
194 static int fsck_commit(struct commit *commit)
195 {
196         char *buffer = commit->buffer;
197         unsigned char sha1[20];
198
199         if (memcmp(buffer, "tree ", 5))
200                 return -1;
201         if (get_sha1_hex(buffer+5, sha1) || buffer[45] != '\n')
202                 return -1;
203         buffer += 46;
204         while (!memcmp(buffer, "parent ", 7)) {
205                 if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
206                         return -1;
207                 buffer += 48;
208         }
209         if (memcmp(buffer, "author ", 7))
210                 return -1;
211         free(commit->buffer);
212         commit->buffer = NULL;
213         if (!commit->tree)
214                 return -1;
215         if (!commit->parents && show_root)
216                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
217         if (!commit->date)
218                 printf("bad commit date in %s\n", 
219                        sha1_to_hex(commit->object.sha1));
220         return 0;
221 }
222
223 static int fsck_tag(struct tag *tag)
224 {
225         struct object *tagged = tag->tagged;
226
227         if (!tagged) {
228                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
229                 return -1;
230         }
231         if (!show_tags)
232                 return 0;
233
234         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
235         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
236         return 0;
237 }
238
239 static int fsck_sha1(unsigned char *sha1)
240 {
241         struct object *obj = parse_object(sha1);
242         if (!obj)
243                 return -1;
244         if (obj->type == blob_type)
245                 return 0;
246         if (obj->type == tree_type)
247                 return fsck_tree((struct tree *) obj);
248         if (obj->type == commit_type)
249                 return fsck_commit((struct commit *) obj);
250         if (obj->type == tag_type)
251                 return fsck_tag((struct tag *) obj);
252         return -1;
253 }
254
255 /*
256  * This is the sorting chunk size: make it reasonably
257  * big so that we can sort well..
258  */
259 #define MAX_SHA1_ENTRIES (1024)
260
261 struct sha1_entry {
262         unsigned long ino;
263         unsigned char sha1[20];
264 };
265
266 static struct {
267         unsigned long nr;
268         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
269 } sha1_list;
270
271 static int ino_compare(const void *_a, const void *_b)
272 {
273         const struct sha1_entry *a = _a, *b = _b;
274         unsigned long ino1 = a->ino, ino2 = b->ino;
275         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
276 }
277
278 static void fsck_sha1_list(void)
279 {
280         int i, nr = sha1_list.nr;
281
282         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
283         for (i = 0; i < nr; i++) {
284                 struct sha1_entry *entry = sha1_list.entry[i];
285                 unsigned char *sha1 = entry->sha1;
286
287                 sha1_list.entry[i] = NULL;
288                 if (fsck_sha1(sha1) < 0)
289                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
290                 free(entry);
291         }
292         sha1_list.nr = 0;
293 }
294
295 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
296 {
297         struct sha1_entry *entry = xmalloc(sizeof(*entry));
298         int nr;
299
300         entry->ino = ino;
301         memcpy(entry->sha1, sha1, 20);
302         nr = sha1_list.nr;
303         if (nr == MAX_SHA1_ENTRIES) {
304                 fsck_sha1_list();
305                 nr = 0;
306         }
307         sha1_list.entry[nr] = entry;
308         sha1_list.nr = ++nr;
309 }
310
311 static int fsck_dir(int i, char *path)
312 {
313         DIR *dir = opendir(path);
314         struct dirent *de;
315
316         if (!dir) {
317                 return error("missing sha1 directory '%s'", path);
318         }
319
320         while ((de = readdir(dir)) != NULL) {
321                 char name[100];
322                 unsigned char sha1[20];
323                 int len = strlen(de->d_name);
324
325                 switch (len) {
326                 case 2:
327                         if (de->d_name[1] != '.')
328                                 break;
329                 case 1:
330                         if (de->d_name[0] != '.')
331                                 break;
332                         continue;
333                 case 38:
334                         sprintf(name, "%02x", i);
335                         memcpy(name+2, de->d_name, len+1);
336                         if (get_sha1_hex(name, sha1) < 0)
337                                 break;
338                         add_sha1_list(sha1, de->d_ino);
339                         continue;
340                 }
341                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
342         }
343         closedir(dir);
344         return 0;
345 }
346
347 static int default_refs = 0;
348
349 static int fsck_handle_ref(const char *refname, const unsigned char *sha1)
350 {
351         struct object *obj;
352
353         obj = lookup_object(sha1);
354         if (!obj) {
355                 if (!standalone && has_sha1_file(sha1)) {
356                         default_refs++;
357                         return 0; /* it is in a pack */
358                 }
359                 error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
360                 /* We'll continue with the rest despite the error.. */
361                 return 0;
362         }
363         default_refs++;
364         obj->used = 1;
365         mark_reachable(obj, REACHABLE);
366         return 0;
367 }
368
369 static void get_default_heads(void)
370 {
371         for_each_ref(fsck_handle_ref);
372         if (!default_refs)
373                 die("No default references");
374 }
375
376 static void fsck_object_dir(const char *path)
377 {
378         int i;
379         for (i = 0; i < 256; i++) {
380                 static char dir[4096];
381                 sprintf(dir, "%s/%02x", path, i);
382                 fsck_dir(i, dir);
383         }
384         fsck_sha1_list();
385 }
386
387 static int fsck_head_link(void)
388 {
389         int fd, count;
390         char hex[40];
391         unsigned char sha1[20];
392         static char path[PATH_MAX], link[PATH_MAX];
393         const char *git_dir;
394
395         git_dir = getenv(GIT_DIR_ENVIRONMENT);
396         if (!git_dir) git_dir = DEFAULT_GIT_DIR_ENVIRONMENT;
397
398         snprintf(path, sizeof(path), "%s/HEAD", git_dir);
399         if (readlink(path, link, sizeof(link)) < 0)
400                 return error("HEAD is not a symlink");
401         if (strncmp("refs/heads/", link, 11))
402                 return error("HEAD points to something strange (%s)", link);
403         fd = open(path, O_RDONLY);
404         if (fd < 0)
405                 return error("HEAD: %s", strerror(errno));
406         count = read(fd, hex, sizeof(hex));
407         close(fd);
408         if (count < 0)
409                 return error("HEAD: %s", strerror(errno));
410         if (count < 40 || get_sha1_hex(hex, sha1))
411                 return error("HEAD: not a valid git pointer");
412         return 0;
413 }
414
415 int main(int argc, char **argv)
416 {
417         int i, heads;
418
419         for (i = 1; i < argc; i++) {
420                 const char *arg = argv[i];
421
422                 if (!strcmp(arg, "--unreachable")) {
423                         show_unreachable = 1;
424                         continue;
425                 }
426                 if (!strcmp(arg, "--tags")) {
427                         show_tags = 1;
428                         continue;
429                 }
430                 if (!strcmp(arg, "--root")) {
431                         show_root = 1;
432                         continue;
433                 }
434                 if (!strcmp(arg, "--cache")) {
435                         keep_cache_objects = 1;
436                         continue;
437                 }
438                 if (!strcmp(arg, "--standalone")) {
439                         standalone = 1;
440                         continue;
441                 }
442                 if (!strcmp(arg, "--full")) {
443                         check_full = 1;
444                         continue;
445                 }
446                 if (!strcmp(arg, "--strict")) {
447                         check_strict = 1;
448                         continue;
449                 }
450                 if (*arg == '-')
451                         usage("git-fsck-objects [--tags] [--root] [[--unreachable] [--cache] [--standalone | --full] [--strict] <head-sha1>*]");
452         }
453
454         if (standalone && check_full)
455                 die("Only one of --standalone or --full can be used.");
456         if (standalone)
457                 putenv("GIT_ALTERNATE_OBJECT_DIRECTORIES=");
458
459         fsck_head_link();
460         fsck_object_dir(get_object_directory());
461         if (check_full) {
462                 struct alternate_object_database *alt;
463                 struct packed_git *p;
464                 prepare_alt_odb();
465                 for (alt = alt_odb_list; alt; alt = alt->next) {
466                         char namebuf[PATH_MAX];
467                         int namelen = alt->name - alt->base;
468                         memcpy(namebuf, alt->base, namelen);
469                         namebuf[namelen - 1] = 0;
470                         fsck_object_dir(namebuf);
471                 }
472                 prepare_packed_git();
473                 for (p = packed_git; p; p = p->next)
474                         /* verify gives error messages itself */
475                         verify_pack(p, 0);
476
477                 for (p = packed_git; p; p = p->next) {
478                         int num = num_packed_objects(p);
479                         for (i = 0; i < num; i++) {
480                                 unsigned char sha1[20];
481                                 nth_packed_object_sha1(p, i, sha1);
482                                 if (fsck_sha1(sha1) < 0)
483                                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
484
485                         }
486                 }
487         }
488
489         heads = 0;
490         for (i = 1; i < argc; i++) {
491                 const char *arg = argv[i]; 
492
493                 if (*arg == '-')
494                         continue;
495
496                 if (!get_sha1(arg, head_sha1)) {
497                         struct object *obj = lookup_object(head_sha1);
498
499                         /* Error is printed by lookup_object(). */
500                         if (!obj)
501                                 continue;
502
503                         obj->used = 1;
504                         mark_reachable(obj, REACHABLE);
505                         heads++;
506                         continue;
507                 }
508                 error("expected sha1, got %s", arg);
509         }
510
511         /*
512          * If we've not been given any explicit head information, do the
513          * default ones from .git/refs. We also consider the index file
514          * in this case (ie this implies --cache).
515          */
516         if (!heads) {
517                 get_default_heads();
518                 keep_cache_objects = 1;
519         }
520
521         if (keep_cache_objects) {
522                 int i;
523                 read_cache();
524                 for (i = 0; i < active_nr; i++) {
525                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
526                         struct object *obj;
527                         if (!blob)
528                                 continue;
529                         obj = &blob->object;
530                         obj->used = 1;
531                         mark_reachable(obj, REACHABLE);
532                 }
533         }
534
535         check_connectivity();
536         return 0;
537 }