549d45af258d0019d1819edea976ed5555f1bf03
[git.git] / sha1_file.c
1 /*
2  * GIT - The information manager from hell
3  *
4  * Copyright (C) Linus Torvalds, 2005
5  *
6  * This handles basic git sha1 object files - packing, unpacking,
7  * creation etc.
8  */
9 #include <stdarg.h>
10 #include <limits.h>
11 #include "cache.h"
12
13 #ifndef O_NOATIME
14 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
15 #define O_NOATIME 01000000
16 #else
17 #define O_NOATIME 0
18 #endif
19 #endif
20
21 static unsigned int sha1_file_open_flag = O_NOATIME;
22
23 static unsigned hexval(char c)
24 {
25         if (c >= '0' && c <= '9')
26                 return c - '0';
27         if (c >= 'a' && c <= 'f')
28                 return c - 'a' + 10;
29         if (c >= 'A' && c <= 'F')
30                 return c - 'A' + 10;
31         return ~0;
32 }
33
34 int get_sha1_hex(const char *hex, unsigned char *sha1)
35 {
36         int i;
37         for (i = 0; i < 20; i++) {
38                 unsigned int val = (hexval(hex[0]) << 4) | hexval(hex[1]);
39                 if (val & ~0xff)
40                         return -1;
41                 *sha1++ = val;
42                 hex += 2;
43         }
44         return 0;
45 }
46
47 int get_sha1_file(const char *path, unsigned char *result)
48 {
49         char buffer[60];
50         int fd = open(path, O_RDONLY);
51         int len;
52
53         if (fd < 0)
54                 return -1;
55         len = read(fd, buffer, sizeof(buffer));
56         close(fd);
57         if (len < 40)
58                 return -1;
59         return get_sha1_hex(buffer, result);
60 }
61
62 int get_sha1(const char *str, unsigned char *sha1)
63 {
64         static char pathname[PATH_MAX];
65         static const char *prefix[] = {
66                 "",
67                 "refs",
68                 "refs/tags",
69                 "refs/heads",
70                 "refs/snap",
71                 NULL
72         };
73         const char *gitdir;
74         const char **p;
75
76         if (!get_sha1_hex(str, sha1))
77                 return 0;
78
79         gitdir = ".git";
80         for (p = prefix; *p; p++) {
81                 snprintf(pathname, sizeof(pathname), "%s/%s/%s", gitdir, *p, str);
82                 if (!get_sha1_file(pathname, sha1))
83                         return 0;
84         }
85
86         return -1;
87 }
88
89 char * sha1_to_hex(const unsigned char *sha1)
90 {
91         static char buffer[50];
92         static const char hex[] = "0123456789abcdef";
93         char *buf = buffer;
94         int i;
95
96         for (i = 0; i < 20; i++) {
97                 unsigned int val = *sha1++;
98                 *buf++ = hex[val >> 4];
99                 *buf++ = hex[val & 0xf];
100         }
101         return buffer;
102 }
103
104 static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
105 {
106         int i;
107         for (i = 0; i < 20; i++) {
108                 static char hex[] = "0123456789abcdef";
109                 unsigned int val = sha1[i];
110                 char *pos = pathbuf + i*2 + (i > 0);
111                 *pos++ = hex[val >> 4];
112                 *pos = hex[val & 0xf];
113         }
114 }
115
116 /*
117  * NOTE! This returns a statically allocated buffer, so you have to be
118  * careful about using it. Do a "strdup()" if you need to save the
119  * filename.
120  *
121  * Also note that this returns the location for creating.  Reading
122  * SHA1 file can happen from any alternate directory listed in the
123  * SHA1_FILE_DIRECTORIES environment variable if it is not found in
124  * the primary object database.
125  */
126 char *sha1_file_name(const unsigned char *sha1)
127 {
128         static char *name, *base;
129
130         if (!base) {
131                 char *sha1_file_directory = get_object_directory();
132                 int len = strlen(sha1_file_directory);
133                 base = xmalloc(len + 60);
134                 memcpy(base, sha1_file_directory, len);
135                 memset(base+len, 0, 60);
136                 base[len] = '/';
137                 base[len+3] = '/';
138                 name = base + len + 1;
139         }
140         fill_sha1_path(name, sha1);
141         return base;
142 }
143
144 static struct alternate_object_database {
145         char *base;
146         char *name;
147 } *alt_odb;
148
149 /*
150  * Prepare alternate object database registry.
151  * alt_odb points at an array of struct alternate_object_database.
152  * This array is terminated with an element that has both its base
153  * and name set to NULL.  alt_odb[n] comes from n'th non-empty
154  * element from colon separated $SHA1_FILE_DIRECTORIES environment
155  * variable, and its base points at a statically allocated buffer
156  * that contains "/the/directory/corresponding/to/.git/objects/...",
157  * while its name points just after the slash at the end of
158  * ".git/objects/" in the example above, and has enough space to hold
159  * 40-byte hex SHA1, an extra slash for the first level indirection,
160  * and the terminating NUL.
161  * This function allocates the alt_odb array and all the strings
162  * pointed by base fields of the array elements with one xmalloc();
163  * the string pool immediately follows the array.
164  */
165 static void prepare_alt_odb(void)
166 {
167         int pass, totlen, i;
168         const char *cp, *last;
169         char *op = 0;
170         const char *alt = getenv(ALTERNATE_DB_ENVIRONMENT) ? : "";
171
172         /* The first pass counts how large an area to allocate to
173          * hold the entire alt_odb structure, including array of
174          * structs and path buffers for them.  The second pass fills
175          * the structure and prepares the path buffers for use by
176          * fill_sha1_path().
177          */
178         for (totlen = pass = 0; pass < 2; pass++) {
179                 last = alt;
180                 i = 0;
181                 do {
182                         cp = strchr(last, ':') ? : last + strlen(last);
183                         if (last != cp) {
184                                 /* 43 = 40-byte + 2 '/' + terminating NUL */
185                                 int pfxlen = cp - last;
186                                 int entlen = pfxlen + 43;
187                                 if (pass == 0)
188                                         totlen += entlen;
189                                 else {
190                                         alt_odb[i].base = op;
191                                         alt_odb[i].name = op + pfxlen + 1;
192                                         memcpy(op, last, pfxlen);
193                                         op[pfxlen] = op[pfxlen + 3] = '/';
194                                         op[entlen-1] = 0;
195                                         op += entlen;
196                                 }
197                                 i++;
198                         }
199                         while (*cp && *cp == ':')
200                                 cp++;
201                         last = cp;
202                 } while (*cp);
203                 if (pass)
204                         break;
205                 alt_odb = xmalloc(sizeof(*alt_odb) * (i + 1) + totlen);
206                 alt_odb[i].base = alt_odb[i].name = 0;
207                 op = (char*)(&alt_odb[i+1]);
208         }
209 }
210
211 static char *find_sha1_file(const unsigned char *sha1, struct stat *st)
212 {
213         int i;
214         char *name = sha1_file_name(sha1);
215
216         if (!stat(name, st))
217                 return name;
218         if (!alt_odb)
219                 prepare_alt_odb();
220         for (i = 0; (name = alt_odb[i].name) != NULL; i++) {
221                 fill_sha1_path(name, sha1);
222                 if (!stat(alt_odb[i].base, st))
223                         return alt_odb[i].base;
224         }
225         return NULL;
226 }
227
228 int check_sha1_signature(unsigned char *sha1, void *map, unsigned long size, const char *type)
229 {
230         char header[100];
231         unsigned char real_sha1[20];
232         SHA_CTX c;
233
234         SHA1_Init(&c);
235         SHA1_Update(&c, header, 1+sprintf(header, "%s %lu", type, size));
236         SHA1_Update(&c, map, size);
237         SHA1_Final(real_sha1, &c);
238         return memcmp(sha1, real_sha1, 20) ? -1 : 0;
239 }
240
241 void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
242 {
243         struct stat st;
244         void *map;
245         int fd;
246         char *filename = find_sha1_file(sha1, &st);
247
248         if (!filename) {
249                 error("cannot map sha1 file %s", sha1_to_hex(sha1));
250                 return NULL;
251         }
252
253         fd = open(filename, O_RDONLY | sha1_file_open_flag);
254         if (fd < 0) {
255                 /* See if it works without O_NOATIME */
256                 switch (sha1_file_open_flag) {
257                 default:
258                         fd = open(filename, O_RDONLY);
259                         if (fd >= 0)
260                                 break;
261                 /* Fallthrough */
262                 case 0:
263                         perror(filename);
264                         return NULL;
265                 }
266
267                 /* If it failed once, it will probably fail again. Stop using O_NOATIME */
268                 sha1_file_open_flag = 0;
269         }
270         map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
271         close(fd);
272         if (-1 == (int)(long)map)
273                 return NULL;
274         *size = st.st_size;
275         return map;
276 }
277
278 void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size)
279 {
280         int ret, bytes;
281         z_stream stream;
282         char buffer[8192];
283         char *buf;
284
285         /* Get the data stream */
286         memset(&stream, 0, sizeof(stream));
287         stream.next_in = map;
288         stream.avail_in = mapsize;
289         stream.next_out = buffer;
290         stream.avail_out = sizeof(buffer);
291
292         inflateInit(&stream);
293         ret = inflate(&stream, 0);
294         if (ret < Z_OK)
295                 return NULL;
296         if (sscanf(buffer, "%10s %lu", type, size) != 2)
297                 return NULL;
298
299         bytes = strlen(buffer) + 1;
300         buf = xmalloc(*size);
301
302         memcpy(buf, buffer + bytes, stream.total_out - bytes);
303         bytes = stream.total_out - bytes;
304         if (bytes < *size && ret == Z_OK) {
305                 stream.next_out = buf + bytes;
306                 stream.avail_out = *size - bytes;
307                 while (inflate(&stream, Z_FINISH) == Z_OK)
308                         /* nothing */;
309         }
310         inflateEnd(&stream);
311         return buf;
312 }
313
314 void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size)
315 {
316         unsigned long mapsize;
317         void *map, *buf;
318
319         map = map_sha1_file(sha1, &mapsize);
320         if (map) {
321                 buf = unpack_sha1_file(map, mapsize, type, size);
322                 munmap(map, mapsize);
323                 return buf;
324         }
325         return NULL;
326 }
327
328 void *read_object_with_reference(const unsigned char *sha1,
329                                  const unsigned char *required_type,
330                                  unsigned long *size,
331                                  unsigned char *actual_sha1_return)
332 {
333         char type[20];
334         void *buffer;
335         unsigned long isize;
336         unsigned char actual_sha1[20];
337
338         memcpy(actual_sha1, sha1, 20);
339         while (1) {
340                 int ref_length = -1;
341                 const char *ref_type = NULL;
342
343                 buffer = read_sha1_file(actual_sha1, type, &isize);
344                 if (!buffer)
345                         return NULL;
346                 if (!strcmp(type, required_type)) {
347                         *size = isize;
348                         if (actual_sha1_return)
349                                 memcpy(actual_sha1_return, actual_sha1, 20);
350                         return buffer;
351                 }
352                 /* Handle references */
353                 else if (!strcmp(type, "commit"))
354                         ref_type = "tree ";
355                 else if (!strcmp(type, "tag"))
356                         ref_type = "object ";
357                 else {
358                         free(buffer);
359                         return NULL;
360                 }
361                 ref_length = strlen(ref_type);
362
363                 if (memcmp(buffer, ref_type, ref_length) ||
364                     get_sha1_hex(buffer + ref_length, actual_sha1)) {
365                         free(buffer);
366                         return NULL;
367                 }
368                 /* Now we have the ID of the referred-to object in
369                  * actual_sha1.  Check again. */
370         }
371 }
372
373 int write_sha1_file(char *buf, unsigned long len, const char *type, unsigned char *returnsha1)
374 {
375         int size;
376         char *compressed;
377         z_stream stream;
378         unsigned char sha1[20];
379         SHA_CTX c;
380         char *filename;
381         static char tmpfile[PATH_MAX];
382         char hdr[50];
383         int fd, hdrlen, ret;
384
385         /* Generate the header */
386         hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
387
388         /* Sha1.. */
389         SHA1_Init(&c);
390         SHA1_Update(&c, hdr, hdrlen);
391         SHA1_Update(&c, buf, len);
392         SHA1_Final(sha1, &c);
393
394         if (returnsha1)
395                 memcpy(returnsha1, sha1, 20);
396
397         filename = sha1_file_name(sha1);
398         fd = open(filename, O_RDONLY);
399         if (fd >= 0) {
400                 /*
401                  * FIXME!!! We might do collision checking here, but we'd
402                  * need to uncompress the old file and check it. Later.
403                  */
404                 close(fd);
405                 return 0;
406         }
407
408         if (errno != ENOENT) {
409                 fprintf(stderr, "sha1 file %s: %s", filename, strerror(errno));
410                 return -1;
411         }
412
413         snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory());
414
415         fd = mkstemp(tmpfile);
416         if (fd < 0) {
417                 fprintf(stderr, "unable to create temporary sha1 filename %s: %s", tmpfile, strerror(errno));
418                 return -1;
419         }
420
421         /* Set it up */
422         memset(&stream, 0, sizeof(stream));
423         deflateInit(&stream, Z_BEST_COMPRESSION);
424         size = deflateBound(&stream, len+hdrlen);
425         compressed = xmalloc(size);
426
427         /* Compress it */
428         stream.next_out = compressed;
429         stream.avail_out = size;
430
431         /* First header.. */
432         stream.next_in = hdr;
433         stream.avail_in = hdrlen;
434         while (deflate(&stream, 0) == Z_OK)
435                 /* nothing */
436
437         /* Then the data itself.. */
438         stream.next_in = buf;
439         stream.avail_in = len;
440         while (deflate(&stream, Z_FINISH) == Z_OK)
441                 /* nothing */;
442         deflateEnd(&stream);
443         size = stream.total_out;
444
445         if (write(fd, compressed, size) != size)
446                 die("unable to write file");
447         fchmod(fd, 0444);
448         close(fd);
449         free(compressed);
450
451         ret = link(tmpfile, filename);
452         if (ret < 0) {
453                 ret = errno;
454
455                 /*
456                  * Coda hack - coda doesn't like cross-directory links,
457                  * so we fall back to a rename, which will mean that it
458                  * won't be able to check collisions, but that's not a
459                  * big deal.
460                  *
461                  * When this succeeds, we just return 0. We have nothing
462                  * left to unlink.
463                  */
464                 if (ret == EXDEV && !rename(tmpfile, filename))
465                         return 0;
466         }
467         unlink(tmpfile);
468         if (ret) {
469                 if (ret != EEXIST) {
470                         fprintf(stderr, "unable to write sha1 filename %s: %s", filename, strerror(ret));
471                         return -1;
472                 }
473                 /* FIXME!!! Collision check here ? */
474         }
475
476         return 0;
477 }
478
479 int write_sha1_from_fd(const unsigned char *sha1, int fd)
480 {
481         char *filename = sha1_file_name(sha1);
482
483         int local;
484         z_stream stream;
485         unsigned char real_sha1[20];
486         char buf[4096];
487         char discard[4096];
488         int ret;
489         SHA_CTX c;
490
491         local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
492
493         if (local < 0)
494                 return error("Couldn't open %s\n", filename);
495
496         memset(&stream, 0, sizeof(stream));
497
498         inflateInit(&stream);
499
500         SHA1_Init(&c);
501
502         do {
503                 ssize_t size;
504                 size = read(fd, buf, 4096);
505                 if (size <= 0) {
506                         close(local);
507                         unlink(filename);
508                         if (!size)
509                                 return error("Connection closed?");
510                         perror("Reading from connection");
511                         return -1;
512                 }
513                 write(local, buf, size);
514                 stream.avail_in = size;
515                 stream.next_in = buf;
516                 do {
517                         stream.next_out = discard;
518                         stream.avail_out = sizeof(discard);
519                         ret = inflate(&stream, Z_SYNC_FLUSH);
520                         SHA1_Update(&c, discard, sizeof(discard) -
521                                     stream.avail_out);
522                 } while (stream.avail_in && ret == Z_OK);
523                 
524         } while (ret == Z_OK);
525         inflateEnd(&stream);
526
527         close(local);
528         SHA1_Final(real_sha1, &c);
529         if (ret != Z_STREAM_END) {
530                 unlink(filename);
531                 return error("File %s corrupted", sha1_to_hex(sha1));
532         }
533         if (memcmp(sha1, real_sha1, 20)) {
534                 unlink(filename);
535                 return error("File %s has bad hash\n", sha1_to_hex(sha1));
536         }
537         
538         return 0;
539 }
540
541 int has_sha1_file(const unsigned char *sha1)
542 {
543         struct stat st;
544         return !!find_sha1_file(sha1, &st);
545 }
546
547 int index_fd(unsigned char *sha1, int fd, struct stat *st)
548 {
549         unsigned long size = st->st_size;
550         void *buf;
551         int ret;
552
553         buf = "";
554         if (size)
555                 buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
556         close(fd);
557         if ((int)(long)buf == -1)
558                 return -1;
559
560         ret = write_sha1_file(buf, size, "blob", sha1);
561         if (size)
562                 munmap(buf, size);
563         return ret;
564 }