[PATCH] git-tar-tree: make file contents accessible to write_header()
[git.git] / tar-tree.c
1 #include <time.h>
2 #include "cache.h"
3
4 #define RECORDSIZE      (512)
5 #define BLOCKSIZE       (RECORDSIZE * 20)
6
7 #define TYPEFLAG_AUTO           '\0'
8 #define TYPEFLAG_REG            '0'
9 #define TYPEFLAG_DIR            '5'
10 #define TYPEFLAG_GLOBAL_HEADER  'g'
11 #define TYPEFLAG_EXT_HEADER     'x'
12
13 static const char *tar_tree_usage = "tar-tree <key> [basedir]";
14
15 static char block[BLOCKSIZE];
16 static unsigned long offset;
17
18 static const char *basedir;
19 static time_t archive_time;
20
21 struct path_prefix {
22         struct path_prefix *prev;
23         const char *name;
24 };
25
26 /* tries hard to write, either succeeds or dies in the attempt */
27 static void reliable_write(void *buf, unsigned long size)
28 {
29         while (size > 0) {
30                 long ret = write(1, buf, size);
31                 if (ret < 0) {
32                         if (errno == EAGAIN)
33                                 continue;
34                         if (errno == EPIPE)
35                                 exit(0);
36                         die("tar-tree: %s", strerror(errno));
37                 } else if (!ret) {
38                         die("tar-tree: disk full?");
39                 }
40                 size -= ret;
41                 buf += ret;
42         }
43 }
44
45 /* writes out the whole block, but only if it is full */
46 static void write_if_needed(void)
47 {
48         if (offset == BLOCKSIZE) {
49                 reliable_write(block, BLOCKSIZE);
50                 offset = 0;
51         }
52 }
53
54 /* acquire the next record from the buffer; user must call write_if_needed() */
55 static char *get_record(void)
56 {
57         char *p = block + offset;
58         memset(p, 0, RECORDSIZE);
59         offset += RECORDSIZE;
60         return p;
61 }
62
63 /*
64  * The end of tar archives is marked by 1024 nul bytes and after that
65  * follows the rest of the block (if any).
66  */
67 static void write_trailer(void)
68 {
69         memset(block + offset, 0, RECORDSIZE);
70         offset += RECORDSIZE;
71         write_if_needed();
72         memset(block + offset, 0, RECORDSIZE);
73         offset += RECORDSIZE;
74         write_if_needed();
75         if (offset) {
76                 memset(block + offset, 0, BLOCKSIZE - offset);
77                 reliable_write(block, BLOCKSIZE);
78                 offset = 0;
79         }
80 }
81
82 /*
83  * queues up writes, so that all our write(2) calls write exactly one
84  * full block; pads writes to RECORDSIZE
85  */
86 static void write_blocked(void *buf, unsigned long size)
87 {
88         unsigned long tail;
89
90         if (offset) {
91                 unsigned long chunk = BLOCKSIZE - offset;
92                 if (size < chunk)
93                         chunk = size;
94                 memcpy(block + offset, buf, chunk);
95                 size -= chunk;
96                 offset += chunk;
97                 buf += chunk;
98                 write_if_needed();
99         }
100         while (size >= BLOCKSIZE) {
101                 reliable_write(buf, BLOCKSIZE);
102                 size -= BLOCKSIZE;
103                 buf += BLOCKSIZE;
104         }
105         if (size) {
106                 memcpy(block + offset, buf, size);
107                 buf += size;
108                 offset += size;
109         }
110         tail = offset % RECORDSIZE;
111         if (tail)  {
112                 memset(block + offset, 0, RECORDSIZE - tail);
113                 offset += RECORDSIZE - tail;
114         }
115         write_if_needed();
116 }
117
118 static void append_string(char **p, const char *s)
119 {
120         unsigned int len = strlen(s);
121         memcpy(*p, s, len);
122         *p += len;
123 }
124
125 static void append_char(char **p, char c)
126 {
127         **p = c;
128         *p += 1;
129 }
130
131 static void append_path_prefix(char **buffer, struct path_prefix *prefix)
132 {
133         if (!prefix)
134                 return;
135         append_path_prefix(buffer, prefix->prev);
136         append_string(buffer, prefix->name);
137         append_char(buffer, '/');
138 }
139
140 static unsigned int path_prefix_len(struct path_prefix *prefix)
141 {
142         if (!prefix)
143                 return 0;
144         return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1;
145 }
146
147 static void append_path(char **p, int is_dir, const char *basepath,
148                         struct path_prefix *prefix, const char *path)
149 {
150         if (basepath) {
151                 append_string(p, basepath);
152                 append_char(p, '/');
153         }
154         append_path_prefix(p, prefix);
155         append_string(p, path);
156         if (is_dir)
157                 append_char(p, '/');
158 }
159
160 static unsigned int path_len(int is_dir, const char *basepath,
161                              struct path_prefix *prefix, const char *path)
162 {
163         unsigned int len = 0;
164         if (basepath)
165                 len += strlen(basepath) + 1;
166         len += path_prefix_len(prefix) + strlen(path);
167         if (is_dir)
168                 len++;
169         return len;
170 }
171
172 static void append_extended_header_prefix(char **p, unsigned int size,
173                                           const char *keyword)
174 {
175         int len = sprintf(*p, "%u %s=", size, keyword);
176         *p += len;
177 }
178
179 static unsigned int extended_header_len(const char *keyword,
180                                         unsigned int valuelen)
181 {
182         /* "%u %s=%s\n" */
183         unsigned int len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1;
184         if (len > 9)
185                 len++;
186         if (len > 99)
187                 len++;
188         return len;
189 }
190
191 static void append_extended_header(char **p, const char *keyword,
192                                    const char *value, unsigned int len)
193 {
194         unsigned int size = extended_header_len(keyword, len);
195         append_extended_header_prefix(p, size, keyword);
196         memcpy(*p, value, len);
197         *p += len;
198         append_char(p, '\n');
199 }
200
201 static void write_header(const char *, char, const char *, struct path_prefix *,
202                          const char *, unsigned int, void *, unsigned long);
203
204 /* stores a pax extended header directly in the block buffer */
205 static void write_extended_header(const char *headerfilename, int is_dir,
206                                   unsigned int flags, const char *basepath,
207                                   struct path_prefix *prefix,
208                                   const char *path, unsigned int namelen,
209                                   void *content, unsigned int contentsize)
210 {
211         char *p;
212         unsigned int pathlen, size;
213
214         size = pathlen = extended_header_len("path", namelen);
215         if (size > RECORDSIZE)
216                 die("tar-tree: extended header too big, wtf?");
217         write_header(NULL, TYPEFLAG_EXT_HEADER, NULL, NULL, headerfilename,
218                      0100600, NULL, size);
219
220         p = get_record();
221         append_extended_header_prefix(&p, pathlen, "path");
222         append_path(&p, is_dir, basepath, prefix, path);
223         append_char(&p, '\n');
224         write_if_needed();
225 }
226
227 static void write_global_extended_header(const char *sha1)
228 {
229         char *p;
230         unsigned int size;
231
232         size = extended_header_len("comment", 40);
233         write_header(NULL, TYPEFLAG_GLOBAL_HEADER, NULL, NULL,
234                      "pax_global_header", 0100600, NULL, size);
235
236         p = get_record();
237         append_extended_header(&p, "comment", sha1_to_hex(sha1), 40);
238         write_if_needed();
239 }
240
241 /* stores a ustar header directly in the block buffer */
242 static void write_header(const char *sha1, char typeflag, const char *basepath,
243                          struct path_prefix *prefix, const char *path,
244                          unsigned int mode, void *buffer, unsigned long size)
245 {
246         unsigned int namelen; 
247         char *p, *header = NULL;
248         unsigned int checksum = 0;
249         int i;
250
251         if (typeflag == TYPEFLAG_AUTO) {
252                 if (S_ISDIR(mode))
253                         typeflag = TYPEFLAG_DIR;
254                 else
255                         typeflag = TYPEFLAG_REG;
256         }
257
258         namelen = path_len(S_ISDIR(mode), basepath, prefix, path);
259         if (namelen > 500) {
260                 die("tar-tree: name too log of object %s\n", sha1_to_hex(sha1));
261         } else if (namelen > 100) {
262                 char *sha1_hex = sha1_to_hex(sha1);
263                 char headerfilename[51];
264                 sprintf(headerfilename, "%s.paxheader", sha1_hex);
265                 /* the extended header must be written before the normal one */
266                 write_extended_header(headerfilename, S_ISDIR(mode),
267                                       0, basepath, prefix, path,
268                                       namelen, buffer, size);
269
270                 header = get_record();
271                 sprintf(header, "%s.data", sha1_hex);
272         } else {
273                 p = header = get_record();
274                 append_path(&p, S_ISDIR(mode), basepath, prefix, path);
275         }
276
277         if (S_ISDIR(mode))
278                 mode |= 0755;   /* GIT doesn't store permissions of dirs */
279         sprintf(&header[100], "%07o", mode & 07777);
280
281         /* XXX: should we provide more meaningful info here? */
282         sprintf(&header[108], "%07o", 0);       /* uid */
283         sprintf(&header[116], "%07o", 0);       /* gid */
284         strncpy(&header[265], "git", 31);       /* uname */
285         strncpy(&header[297], "git", 31);       /* gname */
286
287         sprintf(&header[124], "%011lo", S_ISDIR(mode) ? 0 : size);
288         sprintf(&header[136], "%011lo", archive_time);
289
290         header[156] = typeflag;
291
292         memcpy(&header[257], "ustar", 6);
293         memcpy(&header[263], "00", 2);
294
295         printf(&header[329], "%07o", 0);        /* devmajor */
296         printf(&header[337], "%07o", 0);        /* devminor */
297
298         memset(&header[148], ' ', 8);
299         for (i = 0; i < RECORDSIZE; i++)
300                 checksum += header[i];
301         sprintf(&header[148], "%07o", checksum & 0x1fffff);
302
303         write_if_needed();
304 }
305
306 static void traverse_tree(void *buffer, unsigned long size,
307                           struct path_prefix *prefix)
308 {
309         struct path_prefix this_prefix;
310         this_prefix.prev = prefix;
311
312         while (size) {
313                 int namelen = strlen(buffer)+1;
314                 void *eltbuf;
315                 char elttype[20];
316                 unsigned long eltsize;
317                 unsigned char *sha1 = buffer + namelen;
318                 char *path = strchr(buffer, ' ') + 1;
319                 unsigned int mode;
320
321                 if (size < namelen + 20 || sscanf(buffer, "%o", &mode) != 1)
322                         die("corrupt 'tree' file");
323                 buffer = sha1 + 20;
324                 size -= namelen + 20;
325
326                 eltbuf = read_sha1_file(sha1, elttype, &eltsize);
327                 if (!eltbuf)
328                         die("cannot read %s", sha1_to_hex(sha1));
329                 write_header(sha1, TYPEFLAG_AUTO, basedir, prefix, path,
330                              mode, eltbuf, eltsize);
331                 if (!strcmp(elttype, "tree")) {
332                         this_prefix.name = path;
333                         traverse_tree(eltbuf, eltsize, &this_prefix);
334                 } else if (!strcmp(elttype, "blob")) {
335                         write_blocked(eltbuf, eltsize);
336                 }
337                 free(eltbuf);
338         }
339 }
340
341 /* get commit time from committer line of commit object */
342 time_t commit_time(void * buffer, unsigned long size)
343 {
344         time_t result = 0;
345         char *p = buffer;
346
347         while (size > 0) {
348                 char *endp = memchr(p, '\n', size);
349                 if (!endp || endp == p)
350                         break;
351                 *endp = '\0';
352                 if (endp - p > 10 && !memcmp(p, "committer ", 10)) {
353                         char *nump = strrchr(p, '>');
354                         if (!nump)
355                                 break;
356                         nump++;
357                         result = strtoul(nump, &endp, 10);
358                         if (*endp != ' ')
359                                 result = 0;
360                         break;
361                 }
362                 size -= endp - p - 1;
363                 p = endp + 1;
364         }
365         return result;
366 }
367
368 int main(int argc, char **argv)
369 {
370         unsigned char sha1[20];
371         unsigned char commit_sha1[20];
372         void *buffer;
373         unsigned long size;
374
375         switch (argc) {
376         case 3:
377                 basedir = argv[2];
378                 /* FALLTHROUGH */
379         case 2:
380                 if (get_sha1(argv[1], sha1) < 0)
381                         usage(tar_tree_usage);
382                 break;
383         default:
384                 usage(tar_tree_usage);
385         }
386
387         sha1_file_directory = getenv(DB_ENVIRONMENT);
388         if (!sha1_file_directory)
389                 sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
390
391         buffer = read_object_with_reference(sha1, "commit", &size, commit_sha1);
392         if (buffer) {
393                 write_global_extended_header(commit_sha1);
394                 archive_time = commit_time(buffer, size);
395                 free(buffer);
396         }
397         buffer = read_object_with_reference(sha1, "tree", &size, NULL);
398         if (!buffer)
399                 die("not a reference to a tag, commit or tree object: %s",
400                     sha1_to_hex(sha1));
401         if (!archive_time)
402                 archive_time = time(NULL);
403         if (basedir)
404                 write_header("0", TYPEFLAG_DIR, NULL, NULL, basedir, 040755,
405                              NULL, 0);
406         traverse_tree(buffer, size, NULL);
407         free(buffer);
408         write_trailer();
409         return 0;
410 }