[PATCH] git-tar-tree: add symlink support
[git.git] / tar-tree.c
1 #include <time.h>
2 #include "cache.h"
3
4 #define RECORDSIZE      (512)
5 #define BLOCKSIZE       (RECORDSIZE * 20)
6
7 #define TYPEFLAG_AUTO           '\0'
8 #define TYPEFLAG_REG            '0'
9 #define TYPEFLAG_LNK            '2'
10 #define TYPEFLAG_DIR            '5'
11 #define TYPEFLAG_GLOBAL_HEADER  'g'
12 #define TYPEFLAG_EXT_HEADER     'x'
13
14 #define EXT_HEADER_PATH         1
15 #define EXT_HEADER_LINKPATH     2
16
17 static const char *tar_tree_usage = "tar-tree <key> [basedir]";
18
19 static char block[BLOCKSIZE];
20 static unsigned long offset;
21
22 static const char *basedir;
23 static time_t archive_time;
24
25 struct path_prefix {
26         struct path_prefix *prev;
27         const char *name;
28 };
29
30 /* tries hard to write, either succeeds or dies in the attempt */
31 static void reliable_write(void *buf, unsigned long size)
32 {
33         while (size > 0) {
34                 long ret = write(1, buf, size);
35                 if (ret < 0) {
36                         if (errno == EAGAIN)
37                                 continue;
38                         if (errno == EPIPE)
39                                 exit(0);
40                         die("tar-tree: %s", strerror(errno));
41                 } else if (!ret) {
42                         die("tar-tree: disk full?");
43                 }
44                 size -= ret;
45                 buf += ret;
46         }
47 }
48
49 /* writes out the whole block, but only if it is full */
50 static void write_if_needed(void)
51 {
52         if (offset == BLOCKSIZE) {
53                 reliable_write(block, BLOCKSIZE);
54                 offset = 0;
55         }
56 }
57
58 /* acquire the next record from the buffer; user must call write_if_needed() */
59 static char *get_record(void)
60 {
61         char *p = block + offset;
62         memset(p, 0, RECORDSIZE);
63         offset += RECORDSIZE;
64         return p;
65 }
66
67 /*
68  * The end of tar archives is marked by 1024 nul bytes and after that
69  * follows the rest of the block (if any).
70  */
71 static void write_trailer(void)
72 {
73         memset(block + offset, 0, RECORDSIZE);
74         offset += RECORDSIZE;
75         write_if_needed();
76         memset(block + offset, 0, RECORDSIZE);
77         offset += RECORDSIZE;
78         write_if_needed();
79         if (offset) {
80                 memset(block + offset, 0, BLOCKSIZE - offset);
81                 reliable_write(block, BLOCKSIZE);
82                 offset = 0;
83         }
84 }
85
86 /*
87  * queues up writes, so that all our write(2) calls write exactly one
88  * full block; pads writes to RECORDSIZE
89  */
90 static void write_blocked(void *buf, unsigned long size)
91 {
92         unsigned long tail;
93
94         if (offset) {
95                 unsigned long chunk = BLOCKSIZE - offset;
96                 if (size < chunk)
97                         chunk = size;
98                 memcpy(block + offset, buf, chunk);
99                 size -= chunk;
100                 offset += chunk;
101                 buf += chunk;
102                 write_if_needed();
103         }
104         while (size >= BLOCKSIZE) {
105                 reliable_write(buf, BLOCKSIZE);
106                 size -= BLOCKSIZE;
107                 buf += BLOCKSIZE;
108         }
109         if (size) {
110                 memcpy(block + offset, buf, size);
111                 buf += size;
112                 offset += size;
113         }
114         tail = offset % RECORDSIZE;
115         if (tail)  {
116                 memset(block + offset, 0, RECORDSIZE - tail);
117                 offset += RECORDSIZE - tail;
118         }
119         write_if_needed();
120 }
121
122 static void append_string(char **p, const char *s)
123 {
124         unsigned int len = strlen(s);
125         memcpy(*p, s, len);
126         *p += len;
127 }
128
129 static void append_char(char **p, char c)
130 {
131         **p = c;
132         *p += 1;
133 }
134
135 static void append_path_prefix(char **buffer, struct path_prefix *prefix)
136 {
137         if (!prefix)
138                 return;
139         append_path_prefix(buffer, prefix->prev);
140         append_string(buffer, prefix->name);
141         append_char(buffer, '/');
142 }
143
144 static unsigned int path_prefix_len(struct path_prefix *prefix)
145 {
146         if (!prefix)
147                 return 0;
148         return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1;
149 }
150
151 static void append_path(char **p, int is_dir, const char *basepath,
152                         struct path_prefix *prefix, const char *path)
153 {
154         if (basepath) {
155                 append_string(p, basepath);
156                 append_char(p, '/');
157         }
158         append_path_prefix(p, prefix);
159         append_string(p, path);
160         if (is_dir)
161                 append_char(p, '/');
162 }
163
164 static unsigned int path_len(int is_dir, const char *basepath,
165                              struct path_prefix *prefix, const char *path)
166 {
167         unsigned int len = 0;
168         if (basepath)
169                 len += strlen(basepath) + 1;
170         len += path_prefix_len(prefix) + strlen(path);
171         if (is_dir)
172                 len++;
173         return len;
174 }
175
176 static void append_extended_header_prefix(char **p, unsigned int size,
177                                           const char *keyword)
178 {
179         int len = sprintf(*p, "%u %s=", size, keyword);
180         *p += len;
181 }
182
183 static unsigned int extended_header_len(const char *keyword,
184                                         unsigned int valuelen)
185 {
186         /* "%u %s=%s\n" */
187         unsigned int len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1;
188         if (len > 9)
189                 len++;
190         if (len > 99)
191                 len++;
192         return len;
193 }
194
195 static void append_extended_header(char **p, const char *keyword,
196                                    const char *value, unsigned int len)
197 {
198         unsigned int size = extended_header_len(keyword, len);
199         append_extended_header_prefix(p, size, keyword);
200         memcpy(*p, value, len);
201         *p += len;
202         append_char(p, '\n');
203 }
204
205 static void write_header(const char *, char, const char *, struct path_prefix *,
206                          const char *, unsigned int, void *, unsigned long);
207
208 /* stores a pax extended header directly in the block buffer */
209 static void write_extended_header(const char *headerfilename, int is_dir,
210                                   unsigned int flags, const char *basepath,
211                                   struct path_prefix *prefix,
212                                   const char *path, unsigned int namelen,
213                                   void *content, unsigned int contentsize)
214 {
215         char *p;
216         unsigned int pathlen, size, linkpathlen = 0;
217
218         size = pathlen = extended_header_len("path", namelen);
219         if (flags & EXT_HEADER_LINKPATH) {
220                 linkpathlen = extended_header_len("linkpath", contentsize);
221                 size += linkpathlen;
222         }
223         if (size > RECORDSIZE)
224                 die("tar-tree: extended header too big, wtf?");
225         write_header(NULL, TYPEFLAG_EXT_HEADER, NULL, NULL, headerfilename,
226                      0100600, NULL, size);
227
228         p = get_record();
229         append_extended_header_prefix(&p, pathlen, "path");
230         append_path(&p, is_dir, basepath, prefix, path);
231         append_char(&p, '\n');
232         if (flags & EXT_HEADER_LINKPATH)
233                 append_extended_header(&p, "linkpath", content, contentsize);
234         write_if_needed();
235 }
236
237 static void write_global_extended_header(const char *sha1)
238 {
239         char *p;
240         unsigned int size;
241
242         size = extended_header_len("comment", 40);
243         write_header(NULL, TYPEFLAG_GLOBAL_HEADER, NULL, NULL,
244                      "pax_global_header", 0100600, NULL, size);
245
246         p = get_record();
247         append_extended_header(&p, "comment", sha1_to_hex(sha1), 40);
248         write_if_needed();
249 }
250
251 /* stores a ustar header directly in the block buffer */
252 static void write_header(const char *sha1, char typeflag, const char *basepath,
253                          struct path_prefix *prefix, const char *path,
254                          unsigned int mode, void *buffer, unsigned long size)
255 {
256         unsigned int namelen; 
257         char *header = NULL;
258         unsigned int checksum = 0;
259         int i;
260         unsigned int ext_header = 0;
261
262         if (typeflag == TYPEFLAG_AUTO) {
263                 if (S_ISDIR(mode))
264                         typeflag = TYPEFLAG_DIR;
265                 else if (S_ISLNK(mode))
266                         typeflag = TYPEFLAG_LNK;
267                 else
268                         typeflag = TYPEFLAG_REG;
269         }
270
271         namelen = path_len(S_ISDIR(mode), basepath, prefix, path);
272         if (namelen > 500)
273                 die("tar-tree: name too log of object %s\n", sha1_to_hex(sha1));
274         else if (namelen > 100)
275                 ext_header |= EXT_HEADER_PATH;
276         if (typeflag == TYPEFLAG_LNK && size > 100)
277                 ext_header |= EXT_HEADER_LINKPATH;
278
279         /* the extended header must be written before the normal one */
280         if (ext_header) {
281                 char headerfilename[51];
282                 sprintf(headerfilename, "%s.paxheader", sha1_to_hex(sha1));
283                 write_extended_header(headerfilename, S_ISDIR(mode),
284                                       ext_header, basepath, prefix, path,
285                                       namelen, buffer, size);
286         }
287
288         header = get_record();
289
290         if (ext_header) {
291                 sprintf(header, "%s.data", sha1_to_hex(sha1));
292         } else {
293                 char *p = header;
294                 append_path(&p, S_ISDIR(mode), basepath, prefix, path);
295         }
296
297         if (typeflag == TYPEFLAG_LNK) {
298                 if (ext_header & EXT_HEADER_LINKPATH) {
299                         sprintf(&header[157], "see %s.paxheader",
300                                 sha1_to_hex(sha1));
301                 } else {
302                         if (buffer)
303                                 strncpy(&header[157], buffer, size);
304                 }
305         }
306
307         if (S_ISDIR(mode))
308                 mode |= 0755;   /* GIT doesn't store permissions of dirs */
309         if (S_ISLNK(mode))
310                 mode |= 0777;   /* ... nor of symlinks */
311         sprintf(&header[100], "%07o", mode & 07777);
312
313         /* XXX: should we provide more meaningful info here? */
314         sprintf(&header[108], "%07o", 0);       /* uid */
315         sprintf(&header[116], "%07o", 0);       /* gid */
316         strncpy(&header[265], "git", 31);       /* uname */
317         strncpy(&header[297], "git", 31);       /* gname */
318
319         if (S_ISDIR(mode) || S_ISLNK(mode))
320                 size = 0;
321         sprintf(&header[124], "%011lo", size);
322         sprintf(&header[136], "%011lo", archive_time);
323
324         header[156] = typeflag;
325
326         memcpy(&header[257], "ustar", 6);
327         memcpy(&header[263], "00", 2);
328
329         printf(&header[329], "%07o", 0);        /* devmajor */
330         printf(&header[337], "%07o", 0);        /* devminor */
331
332         memset(&header[148], ' ', 8);
333         for (i = 0; i < RECORDSIZE; i++)
334                 checksum += header[i];
335         sprintf(&header[148], "%07o", checksum & 0x1fffff);
336
337         write_if_needed();
338 }
339
340 static void traverse_tree(void *buffer, unsigned long size,
341                           struct path_prefix *prefix)
342 {
343         struct path_prefix this_prefix;
344         this_prefix.prev = prefix;
345
346         while (size) {
347                 int namelen = strlen(buffer)+1;
348                 void *eltbuf;
349                 char elttype[20];
350                 unsigned long eltsize;
351                 unsigned char *sha1 = buffer + namelen;
352                 char *path = strchr(buffer, ' ') + 1;
353                 unsigned int mode;
354
355                 if (size < namelen + 20 || sscanf(buffer, "%o", &mode) != 1)
356                         die("corrupt 'tree' file");
357                 buffer = sha1 + 20;
358                 size -= namelen + 20;
359
360                 eltbuf = read_sha1_file(sha1, elttype, &eltsize);
361                 if (!eltbuf)
362                         die("cannot read %s", sha1_to_hex(sha1));
363                 write_header(sha1, TYPEFLAG_AUTO, basedir, prefix, path,
364                              mode, eltbuf, eltsize);
365                 if (!strcmp(elttype, "tree")) {
366                         this_prefix.name = path;
367                         traverse_tree(eltbuf, eltsize, &this_prefix);
368                 } else if (!strcmp(elttype, "blob") && !S_ISLNK(mode)) {
369                         write_blocked(eltbuf, eltsize);
370                 }
371                 free(eltbuf);
372         }
373 }
374
375 /* get commit time from committer line of commit object */
376 time_t commit_time(void * buffer, unsigned long size)
377 {
378         time_t result = 0;
379         char *p = buffer;
380
381         while (size > 0) {
382                 char *endp = memchr(p, '\n', size);
383                 if (!endp || endp == p)
384                         break;
385                 *endp = '\0';
386                 if (endp - p > 10 && !memcmp(p, "committer ", 10)) {
387                         char *nump = strrchr(p, '>');
388                         if (!nump)
389                                 break;
390                         nump++;
391                         result = strtoul(nump, &endp, 10);
392                         if (*endp != ' ')
393                                 result = 0;
394                         break;
395                 }
396                 size -= endp - p - 1;
397                 p = endp + 1;
398         }
399         return result;
400 }
401
402 int main(int argc, char **argv)
403 {
404         unsigned char sha1[20];
405         unsigned char commit_sha1[20];
406         void *buffer;
407         unsigned long size;
408
409         switch (argc) {
410         case 3:
411                 basedir = argv[2];
412                 /* FALLTHROUGH */
413         case 2:
414                 if (get_sha1(argv[1], sha1) < 0)
415                         usage(tar_tree_usage);
416                 break;
417         default:
418                 usage(tar_tree_usage);
419         }
420
421         sha1_file_directory = getenv(DB_ENVIRONMENT);
422         if (!sha1_file_directory)
423                 sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
424
425         buffer = read_object_with_reference(sha1, "commit", &size, commit_sha1);
426         if (buffer) {
427                 write_global_extended_header(commit_sha1);
428                 archive_time = commit_time(buffer, size);
429                 free(buffer);
430         }
431         buffer = read_object_with_reference(sha1, "tree", &size, NULL);
432         if (!buffer)
433                 die("not a reference to a tag, commit or tree object: %s",
434                     sha1_to_hex(sha1));
435         if (!archive_time)
436                 archive_time = time(NULL);
437         if (basedir)
438                 write_header("0", TYPEFLAG_DIR, NULL, NULL, basedir, 040755,
439                              NULL, 0);
440         traverse_tree(buffer, size, NULL);
441         free(buffer);
442         write_trailer();
443         return 0;
444 }