Document git-clone --use-separate-remote
[git.git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
41
42 static struct curl_slist *no_pragma_header;
43
44 struct alt_base
45 {
46         char *base;
47         int path_len;
48         int got_indices;
49         struct packed_git *packs;
50         struct alt_base *next;
51 };
52
53 static struct alt_base *alt = NULL;
54
55 enum object_request_state {
56         WAITING,
57         ABORTED,
58         ACTIVE,
59         COMPLETE,
60 };
61
62 struct object_request
63 {
64         unsigned char sha1[20];
65         struct alt_base *repo;
66         char *url;
67         char filename[PATH_MAX];
68         char tmpfile[PATH_MAX];
69         int local;
70         enum object_request_state state;
71         CURLcode curl_result;
72         char errorstr[CURL_ERROR_SIZE];
73         long http_code;
74         unsigned char real_sha1[20];
75         SHA_CTX c;
76         z_stream stream;
77         int zret;
78         int rename;
79         struct active_request_slot *slot;
80         struct object_request *next;
81 };
82
83 struct alternates_request {
84         char *base;
85         char *url;
86         struct buffer *buffer;
87         struct active_request_slot *slot;
88         int http_specific;
89 };
90
91 #ifndef NO_EXPAT
92 struct xml_ctx
93 {
94         char *name;
95         int len;
96         char *cdata;
97         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98         void *userData;
99 };
100
101 struct remote_ls_ctx
102 {
103         struct alt_base *repo;
104         char *path;
105         void (*userFunc)(struct remote_ls_ctx *ls);
106         void *userData;
107         int flags;
108         char *dentry_name;
109         int dentry_flags;
110         int rc;
111         struct remote_ls_ctx *parent;
112 };
113 #endif
114
115 static struct object_request *object_queue_head = NULL;
116
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118                                void *data)
119 {
120         unsigned char expn[4096];
121         size_t size = eltsize * nmemb;
122         int posn = 0;
123         struct object_request *obj_req = (struct object_request *)data;
124         do {
125                 ssize_t retval = write(obj_req->local,
126                                        ptr + posn, size - posn);
127                 if (retval < 0)
128                         return posn;
129                 posn += retval;
130         } while (posn < size);
131
132         obj_req->stream.avail_in = size;
133         obj_req->stream.next_in = ptr;
134         do {
135                 obj_req->stream.next_out = expn;
136                 obj_req->stream.avail_out = sizeof(expn);
137                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138                 SHA1_Update(&obj_req->c, expn,
139                             sizeof(expn) - obj_req->stream.avail_out);
140         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141         data_received++;
142         return size;
143 }
144
145 static void fetch_alternates(char *base);
146
147 static void process_object_response(void *callback_data);
148
149 static void start_object_request(struct object_request *obj_req)
150 {
151         char *hex = sha1_to_hex(obj_req->sha1);
152         char prevfile[PATH_MAX];
153         char *url;
154         char *posn;
155         int prevlocal;
156         unsigned char prev_buf[PREV_BUF_SIZE];
157         ssize_t prev_read = 0;
158         long prev_posn = 0;
159         char range[RANGE_HEADER_SIZE];
160         struct curl_slist *range_header = NULL;
161         struct active_request_slot *slot;
162
163         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164         unlink(prevfile);
165         rename(obj_req->tmpfile, prevfile);
166         unlink(obj_req->tmpfile);
167
168         if (obj_req->local != -1)
169                 error("fd leakage in start: %d", obj_req->local);
170         obj_req->local = open(obj_req->tmpfile,
171                               O_WRONLY | O_CREAT | O_EXCL, 0666);
172         /* This could have failed due to the "lazy directory creation";
173          * try to mkdir the last path component.
174          */
175         if (obj_req->local < 0 && errno == ENOENT) {
176                 char *dir = strrchr(obj_req->tmpfile, '/');
177                 if (dir) {
178                         *dir = 0;
179                         mkdir(obj_req->tmpfile, 0777);
180                         *dir = '/';
181                 }
182                 obj_req->local = open(obj_req->tmpfile,
183                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
184         }
185
186         if (obj_req->local < 0) {
187                 obj_req->state = ABORTED;
188                 error("Couldn't create temporary file %s for %s: %s",
189                       obj_req->tmpfile, obj_req->filename, strerror(errno));
190                 return;
191         }
192
193         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
194
195         inflateInit(&obj_req->stream);
196
197         SHA1_Init(&obj_req->c);
198
199         url = xmalloc(strlen(obj_req->repo->base) + 50);
200         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201         strcpy(url, obj_req->repo->base);
202         posn = url + strlen(obj_req->repo->base);
203         strcpy(posn, "objects/");
204         posn += 8;
205         memcpy(posn, hex, 2);
206         posn += 2;
207         *(posn++) = '/';
208         strcpy(posn, hex + 2);
209         strcpy(obj_req->url, url);
210
211         /* If a previous temp file is present, process what was already
212            fetched. */
213         prevlocal = open(prevfile, O_RDONLY);
214         if (prevlocal != -1) {
215                 do {
216                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217                         if (prev_read>0) {
218                                 if (fwrite_sha1_file(prev_buf,
219                                                      1,
220                                                      prev_read,
221                                                      obj_req) == prev_read) {
222                                         prev_posn += prev_read;
223                                 } else {
224                                         prev_read = -1;
225                                 }
226                         }
227                 } while (prev_read > 0);
228                 close(prevlocal);
229         }
230         unlink(prevfile);
231
232         /* Reset inflate/SHA1 if there was an error reading the previous temp
233            file; also rewind to the beginning of the local file. */
234         if (prev_read == -1) {
235                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236                 inflateInit(&obj_req->stream);
237                 SHA1_Init(&obj_req->c);
238                 if (prev_posn>0) {
239                         prev_posn = 0;
240                         lseek(obj_req->local, SEEK_SET, 0);
241                         ftruncate(obj_req->local, 0);
242                 }
243         }
244
245         slot = get_active_slot();
246         slot->callback_func = process_object_response;
247         slot->callback_data = obj_req;
248         obj_req->slot = slot;
249
250         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
255
256         /* If we have successfully processed data from a previous fetch
257            attempt, only fetch the data we don't already have. */
258         if (prev_posn>0) {
259                 if (get_verbosely)
260                         fprintf(stderr,
261                                 "Resuming fetch of object %s at byte %ld\n",
262                                 hex, prev_posn);
263                 sprintf(range, "Range: bytes=%ld-", prev_posn);
264                 range_header = curl_slist_append(range_header, range);
265                 curl_easy_setopt(slot->curl,
266                                  CURLOPT_HTTPHEADER, range_header);
267         }
268
269         /* Try to get the request started, abort the request on error */
270         obj_req->state = ACTIVE;
271         if (!start_active_slot(slot)) {
272                 obj_req->state = ABORTED;
273                 obj_req->slot = NULL;
274                 close(obj_req->local); obj_req->local = -1;
275                 free(obj_req->url);
276                 return;
277         }
278 }
279
280 static void finish_object_request(struct object_request *obj_req)
281 {
282         struct stat st;
283
284         fchmod(obj_req->local, 0444);
285         close(obj_req->local); obj_req->local = -1;
286
287         if (obj_req->http_code == 416) {
288                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289         } else if (obj_req->curl_result != CURLE_OK) {
290                 if (stat(obj_req->tmpfile, &st) == 0)
291                         if (st.st_size == 0)
292                                 unlink(obj_req->tmpfile);
293                 return;
294         }
295
296         inflateEnd(&obj_req->stream);
297         SHA1_Final(obj_req->real_sha1, &obj_req->c);
298         if (obj_req->zret != Z_STREAM_END) {
299                 unlink(obj_req->tmpfile);
300                 return;
301         }
302         if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303                 unlink(obj_req->tmpfile);
304                 return;
305         }
306         obj_req->rename =
307                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
308
309         if (obj_req->rename == 0)
310                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
311 }
312
313 static void process_object_response(void *callback_data)
314 {
315         struct object_request *obj_req =
316                 (struct object_request *)callback_data;
317
318         obj_req->curl_result = obj_req->slot->curl_result;
319         obj_req->http_code = obj_req->slot->http_code;
320         obj_req->slot = NULL;
321         obj_req->state = COMPLETE;
322
323         /* Use alternates if necessary */
324         if (obj_req->http_code == 404 ||
325             obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326                 fetch_alternates(alt->base);
327                 if (obj_req->repo->next != NULL) {
328                         obj_req->repo =
329                                 obj_req->repo->next;
330                         close(obj_req->local);
331                         obj_req->local = -1;
332                         start_object_request(obj_req);
333                         return;
334                 }
335         }
336
337         finish_object_request(obj_req);
338 }
339
340 static void release_object_request(struct object_request *obj_req)
341 {
342         struct object_request *entry = object_queue_head;
343
344         if (obj_req->local != -1)
345                 error("fd leakage in release: %d", obj_req->local);
346         if (obj_req == object_queue_head) {
347                 object_queue_head = obj_req->next;
348         } else {
349                 while (entry->next != NULL && entry->next != obj_req)
350                         entry = entry->next;
351                 if (entry->next == obj_req)
352                         entry->next = entry->next->next;
353         }
354
355         free(obj_req->url);
356         free(obj_req);
357 }
358
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
361 {
362         struct object_request *obj_req = object_queue_head;
363         struct active_request_slot *slot = active_queue_head;
364         int num_transfers;
365
366         while (active_requests < max_requests && obj_req != NULL) {
367                 if (obj_req->state == WAITING) {
368                         if (has_sha1_file(obj_req->sha1))
369                                 obj_req->state = COMPLETE;
370                         else
371                                 start_object_request(obj_req);
372                         curl_multi_perform(curlm, &num_transfers);
373                 }
374                 obj_req = obj_req->next;
375         }
376
377         while (slot != NULL) {
378                 if (!slot->in_use && slot->curl != NULL) {
379                         curl_easy_cleanup(slot->curl);
380                         slot->curl = NULL;
381                 }
382                 slot = slot->next;
383         }
384 }
385 #endif
386
387 void prefetch(unsigned char *sha1)
388 {
389         struct object_request *newreq;
390         struct object_request *tail;
391         char *filename = sha1_file_name(sha1);
392
393         newreq = xmalloc(sizeof(*newreq));
394         memcpy(newreq->sha1, sha1, 20);
395         newreq->repo = alt;
396         newreq->url = NULL;
397         newreq->local = -1;
398         newreq->state = WAITING;
399         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401                  "%s.temp", filename);
402         newreq->slot = NULL;
403         newreq->next = NULL;
404
405         if (object_queue_head == NULL) {
406                 object_queue_head = newreq;
407         } else {
408                 tail = object_queue_head;
409                 while (tail->next != NULL) {
410                         tail = tail->next;
411                 }
412                 tail->next = newreq;
413         }
414
415 #ifdef USE_CURL_MULTI
416         fill_active_slots();
417         step_active_slots();
418 #endif
419 }
420
421 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
422 {
423         char *hex = sha1_to_hex(sha1);
424         char *filename;
425         char *url;
426         char tmpfile[PATH_MAX];
427         long prev_posn = 0;
428         char range[RANGE_HEADER_SIZE];
429         struct curl_slist *range_header = NULL;
430
431         FILE *indexfile;
432         struct active_request_slot *slot;
433         struct slot_results results;
434
435         if (has_pack_index(sha1))
436                 return 0;
437
438         if (get_verbosely)
439                 fprintf(stderr, "Getting index for pack %s\n", hex);
440
441         url = xmalloc(strlen(repo->base) + 64);
442         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
443
444         filename = sha1_pack_index_name(sha1);
445         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
446         indexfile = fopen(tmpfile, "a");
447         if (!indexfile)
448                 return error("Unable to open local file %s for pack index",
449                              filename);
450
451         slot = get_active_slot();
452         slot->results = &results;
453         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
454         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
455         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
456         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
457         slot->local = indexfile;
458
459         /* If there is data present from a previous transfer attempt,
460            resume where it left off */
461         prev_posn = ftell(indexfile);
462         if (prev_posn>0) {
463                 if (get_verbosely)
464                         fprintf(stderr,
465                                 "Resuming fetch of index for pack %s at byte %ld\n",
466                                 hex, prev_posn);
467                 sprintf(range, "Range: bytes=%ld-", prev_posn);
468                 range_header = curl_slist_append(range_header, range);
469                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
470         }
471
472         if (start_active_slot(slot)) {
473                 run_active_slot(slot);
474                 if (results.curl_result != CURLE_OK) {
475                         fclose(indexfile);
476                         return error("Unable to get pack index %s\n%s", url,
477                                      curl_errorstr);
478                 }
479         } else {
480                 fclose(indexfile);
481                 return error("Unable to start request");
482         }
483
484         fclose(indexfile);
485
486         return move_temp_to_file(tmpfile, filename);
487 }
488
489 static int setup_index(struct alt_base *repo, unsigned char *sha1)
490 {
491         struct packed_git *new_pack;
492         if (has_pack_file(sha1))
493                 return 0; // don't list this as something we can get
494
495         if (fetch_index(repo, sha1))
496                 return -1;
497
498         new_pack = parse_pack_index(sha1);
499         new_pack->next = repo->packs;
500         repo->packs = new_pack;
501         return 0;
502 }
503
504 static void process_alternates_response(void *callback_data)
505 {
506         struct alternates_request *alt_req =
507                 (struct alternates_request *)callback_data;
508         struct active_request_slot *slot = alt_req->slot;
509         struct alt_base *tail = alt;
510         char *base = alt_req->base;
511         static const char null_byte = '\0';
512         char *data;
513         int i = 0;
514
515         if (alt_req->http_specific) {
516                 if (slot->curl_result != CURLE_OK ||
517                     !alt_req->buffer->posn) {
518
519                         /* Try reusing the slot to get non-http alternates */
520                         alt_req->http_specific = 0;
521                         sprintf(alt_req->url, "%s/objects/info/alternates",
522                                 base);
523                         curl_easy_setopt(slot->curl, CURLOPT_URL,
524                                          alt_req->url);
525                         active_requests++;
526                         slot->in_use = 1;
527                         if (slot->finished != NULL)
528                                 (*slot->finished) = 0;
529                         if (!start_active_slot(slot)) {
530                                 got_alternates = -1;
531                                 slot->in_use = 0;
532                                 if (slot->finished != NULL)
533                                         (*slot->finished) = 1;
534                         }
535                         return;
536                 }
537         } else if (slot->curl_result != CURLE_OK) {
538                 if (slot->http_code != 404 &&
539                     slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
540                         got_alternates = -1;
541                         return;
542                 }
543         }
544
545         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
546         alt_req->buffer->posn--;
547         data = alt_req->buffer->buffer;
548
549         while (i < alt_req->buffer->posn) {
550                 int posn = i;
551                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
552                         posn++;
553                 if (data[posn] == '\n') {
554                         int okay = 0;
555                         int serverlen = 0;
556                         struct alt_base *newalt;
557                         char *target = NULL;
558                         char *path;
559                         if (data[i] == '/') {
560                                 serverlen = strchr(base + 8, '/') - base;
561                                 okay = 1;
562                         } else if (!memcmp(data + i, "../", 3)) {
563                                 i += 3;
564                                 serverlen = strlen(base);
565                                 while (i + 2 < posn &&
566                                        !memcmp(data + i, "../", 3)) {
567                                         do {
568                                                 serverlen--;
569                                         } while (serverlen &&
570                                                  base[serverlen - 1] != '/');
571                                         i += 3;
572                                 }
573                                 // If the server got removed, give up.
574                                 okay = strchr(base, ':') - base + 3 <
575                                         serverlen;
576                         } else if (alt_req->http_specific) {
577                                 char *colon = strchr(data + i, ':');
578                                 char *slash = strchr(data + i, '/');
579                                 if (colon && slash && colon < data + posn &&
580                                     slash < data + posn && colon < slash) {
581                                         okay = 1;
582                                 }
583                         }
584                         // skip 'objects' at end
585                         if (okay) {
586                                 target = xmalloc(serverlen + posn - i - 6);
587                                 strncpy(target, base, serverlen);
588                                 strncpy(target + serverlen, data + i,
589                                         posn - i - 7);
590                                 target[serverlen + posn - i - 7] = '\0';
591                                 if (get_verbosely)
592                                         fprintf(stderr,
593                                                 "Also look at %s\n", target);
594                                 newalt = xmalloc(sizeof(*newalt));
595                                 newalt->next = NULL;
596                                 newalt->base = target;
597                                 newalt->got_indices = 0;
598                                 newalt->packs = NULL;
599                                 path = strstr(target, "//");
600                                 if (path) {
601                                         path = strchr(path+2, '/');
602                                         if (path)
603                                                 newalt->path_len = strlen(path);
604                                 }
605
606                                 while (tail->next != NULL)
607                                         tail = tail->next;
608                                 tail->next = newalt;
609                         }
610                 }
611                 i = posn + 1;
612         }
613
614         got_alternates = 1;
615 }
616
617 static void fetch_alternates(char *base)
618 {
619         struct buffer buffer;
620         char *url;
621         char *data;
622         struct active_request_slot *slot;
623         struct alternates_request alt_req;
624
625         /* If another request has already started fetching alternates,
626            wait for them to arrive and return to processing this request's
627            curl message */
628 #ifdef USE_CURL_MULTI
629         while (got_alternates == 0) {
630                 step_active_slots();
631         }
632 #endif
633
634         /* Nothing to do if they've already been fetched */
635         if (got_alternates == 1)
636                 return;
637
638         /* Start the fetch */
639         got_alternates = 0;
640
641         data = xmalloc(4096);
642         buffer.size = 4096;
643         buffer.posn = 0;
644         buffer.buffer = data;
645
646         if (get_verbosely)
647                 fprintf(stderr, "Getting alternates list for %s\n", base);
648
649         url = xmalloc(strlen(base) + 31);
650         sprintf(url, "%s/objects/info/http-alternates", base);
651
652         /* Use a callback to process the result, since another request
653            may fail and need to have alternates loaded before continuing */
654         slot = get_active_slot();
655         slot->callback_func = process_alternates_response;
656         slot->callback_data = &alt_req;
657
658         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
659         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
660         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
661
662         alt_req.base = base;
663         alt_req.url = url;
664         alt_req.buffer = &buffer;
665         alt_req.http_specific = 1;
666         alt_req.slot = slot;
667
668         if (start_active_slot(slot))
669                 run_active_slot(slot);
670         else
671                 got_alternates = -1;
672
673         free(data);
674         free(url);
675 }
676
677 #ifndef NO_EXPAT
678 static void
679 xml_start_tag(void *userData, const char *name, const char **atts)
680 {
681         struct xml_ctx *ctx = (struct xml_ctx *)userData;
682         const char *c = strchr(name, ':');
683         int new_len;
684
685         if (c == NULL)
686                 c = name;
687         else
688                 c++;
689
690         new_len = strlen(ctx->name) + strlen(c) + 2;
691
692         if (new_len > ctx->len) {
693                 ctx->name = xrealloc(ctx->name, new_len);
694                 ctx->len = new_len;
695         }
696         strcat(ctx->name, ".");
697         strcat(ctx->name, c);
698
699         if (ctx->cdata) {
700                 free(ctx->cdata);
701                 ctx->cdata = NULL;
702         }
703
704         ctx->userFunc(ctx, 0);
705 }
706
707 static void
708 xml_end_tag(void *userData, const char *name)
709 {
710         struct xml_ctx *ctx = (struct xml_ctx *)userData;
711         const char *c = strchr(name, ':');
712         char *ep;
713
714         ctx->userFunc(ctx, 1);
715
716         if (c == NULL)
717                 c = name;
718         else
719                 c++;
720
721         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
722         *ep = 0;
723 }
724
725 static void
726 xml_cdata(void *userData, const XML_Char *s, int len)
727 {
728         struct xml_ctx *ctx = (struct xml_ctx *)userData;
729         if (ctx->cdata)
730                 free(ctx->cdata);
731         ctx->cdata = xcalloc(len+1, 1);
732         strncpy(ctx->cdata, s, len);
733 }
734
735 static int remote_ls(struct alt_base *repo, const char *path, int flags,
736                      void (*userFunc)(struct remote_ls_ctx *ls),
737                      void *userData);
738
739 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
740 {
741         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
742
743         if (tag_closed) {
744                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
745                         if (ls->dentry_flags & IS_DIR) {
746                                 if (ls->flags & PROCESS_DIRS) {
747                                         ls->userFunc(ls);
748                                 }
749                                 if (strcmp(ls->dentry_name, ls->path) &&
750                                     ls->flags & RECURSIVE) {
751                                         ls->rc = remote_ls(ls->repo,
752                                                            ls->dentry_name,
753                                                            ls->flags,
754                                                            ls->userFunc,
755                                                            ls->userData);
756                                 }
757                         } else if (ls->flags & PROCESS_FILES) {
758                                 ls->userFunc(ls);
759                         }
760                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
761                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
762                                                   ls->repo->path_len + 1);
763                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
764                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
765                         ls->dentry_flags |= IS_DIR;
766                 }
767         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
768                 if (ls->dentry_name) {
769                         free(ls->dentry_name);
770                 }
771                 ls->dentry_name = NULL;
772                 ls->dentry_flags = 0;
773         }
774 }
775
776 static int remote_ls(struct alt_base *repo, const char *path, int flags,
777                      void (*userFunc)(struct remote_ls_ctx *ls),
778                      void *userData)
779 {
780         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
781         struct active_request_slot *slot;
782         struct slot_results results;
783         struct buffer in_buffer;
784         struct buffer out_buffer;
785         char *in_data;
786         char *out_data;
787         XML_Parser parser = XML_ParserCreate(NULL);
788         enum XML_Status result;
789         struct curl_slist *dav_headers = NULL;
790         struct xml_ctx ctx;
791         struct remote_ls_ctx ls;
792
793         ls.flags = flags;
794         ls.repo = repo;
795         ls.path = strdup(path);
796         ls.dentry_name = NULL;
797         ls.dentry_flags = 0;
798         ls.userData = userData;
799         ls.userFunc = userFunc;
800         ls.rc = 0;
801
802         sprintf(url, "%s%s", repo->base, path);
803
804         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
805         out_data = xmalloc(out_buffer.size + 1);
806         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
807         out_buffer.posn = 0;
808         out_buffer.buffer = out_data;
809
810         in_buffer.size = 4096;
811         in_data = xmalloc(in_buffer.size);
812         in_buffer.posn = 0;
813         in_buffer.buffer = in_data;
814
815         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
816         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
817
818         slot = get_active_slot();
819         slot->results = &results;
820         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
821         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
822         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
823         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
824         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
825         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
826         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
827         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
828         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
829
830         if (start_active_slot(slot)) {
831                 run_active_slot(slot);
832                 if (results.curl_result == CURLE_OK) {
833                         ctx.name = xcalloc(10, 1);
834                         ctx.len = 0;
835                         ctx.cdata = NULL;
836                         ctx.userFunc = handle_remote_ls_ctx;
837                         ctx.userData = &ls;
838                         XML_SetUserData(parser, &ctx);
839                         XML_SetElementHandler(parser, xml_start_tag,
840                                               xml_end_tag);
841                         XML_SetCharacterDataHandler(parser, xml_cdata);
842                         result = XML_Parse(parser, in_buffer.buffer,
843                                            in_buffer.posn, 1);
844                         free(ctx.name);
845
846                         if (result != XML_STATUS_OK) {
847                                 ls.rc = error("XML error: %s",
848                                               XML_ErrorString(
849                                                       XML_GetErrorCode(parser)));
850                         }
851                 } else {
852                         ls.rc = -1;
853                 }
854         } else {
855                 ls.rc = error("Unable to start PROPFIND request");
856         }
857
858         free(ls.path);
859         free(url);
860         free(out_data);
861         free(in_buffer.buffer);
862         curl_slist_free_all(dav_headers);
863
864         return ls.rc;
865 }
866
867 static void process_ls_pack(struct remote_ls_ctx *ls)
868 {
869         unsigned char sha1[20];
870
871         if (strlen(ls->dentry_name) == 63 &&
872             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
873             !strncmp(ls->dentry_name+58, ".pack", 5)) {
874                 get_sha1_hex(ls->dentry_name + 18, sha1);
875                 setup_index(ls->repo, sha1);
876         }
877 }
878 #endif
879
880 static int fetch_indices(struct alt_base *repo)
881 {
882         unsigned char sha1[20];
883         char *url;
884         struct buffer buffer;
885         char *data;
886         int i = 0;
887
888         struct active_request_slot *slot;
889         struct slot_results results;
890
891         if (repo->got_indices)
892                 return 0;
893
894         data = xmalloc(4096);
895         buffer.size = 4096;
896         buffer.posn = 0;
897         buffer.buffer = data;
898
899         if (get_verbosely)
900                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
901
902 #ifndef NO_EXPAT
903         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
904                       process_ls_pack, NULL) == 0)
905                 return 0;
906 #endif
907
908         url = xmalloc(strlen(repo->base) + 21);
909         sprintf(url, "%s/objects/info/packs", repo->base);
910
911         slot = get_active_slot();
912         slot->results = &results;
913         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
914         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
915         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
916         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
917         if (start_active_slot(slot)) {
918                 run_active_slot(slot);
919                 if (results.curl_result != CURLE_OK) {
920                         if (results.http_code == 404 ||
921                             results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
922                                 repo->got_indices = 1;
923                                 free(buffer.buffer);
924                                 return 0;
925                         } else {
926                                 repo->got_indices = 0;
927                                 free(buffer.buffer);
928                                 return error("%s", curl_errorstr);
929                         }
930                 }
931         } else {
932                 repo->got_indices = 0;
933                 free(buffer.buffer);
934                 return error("Unable to start request");
935         }
936
937         data = buffer.buffer;
938         while (i < buffer.posn) {
939                 switch (data[i]) {
940                 case 'P':
941                         i++;
942                         if (i + 52 <= buffer.posn &&
943                             !strncmp(data + i, " pack-", 6) &&
944                             !strncmp(data + i + 46, ".pack\n", 6)) {
945                                 get_sha1_hex(data + i + 6, sha1);
946                                 setup_index(repo, sha1);
947                                 i += 51;
948                                 break;
949                         }
950                 default:
951                         while (i < buffer.posn && data[i] != '\n')
952                                 i++;
953                 }
954                 i++;
955         }
956
957         free(buffer.buffer);
958         repo->got_indices = 1;
959         return 0;
960 }
961
962 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
963 {
964         char *url;
965         struct packed_git *target;
966         struct packed_git **lst;
967         FILE *packfile;
968         char *filename;
969         char tmpfile[PATH_MAX];
970         int ret;
971         long prev_posn = 0;
972         char range[RANGE_HEADER_SIZE];
973         struct curl_slist *range_header = NULL;
974
975         struct active_request_slot *slot;
976         struct slot_results results;
977
978         if (fetch_indices(repo))
979                 return -1;
980         target = find_sha1_pack(sha1, repo->packs);
981         if (!target)
982                 return -1;
983
984         if (get_verbosely) {
985                 fprintf(stderr, "Getting pack %s\n",
986                         sha1_to_hex(target->sha1));
987                 fprintf(stderr, " which contains %s\n",
988                         sha1_to_hex(sha1));
989         }
990
991         url = xmalloc(strlen(repo->base) + 65);
992         sprintf(url, "%s/objects/pack/pack-%s.pack",
993                 repo->base, sha1_to_hex(target->sha1));
994
995         filename = sha1_pack_name(target->sha1);
996         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
997         packfile = fopen(tmpfile, "a");
998         if (!packfile)
999                 return error("Unable to open local file %s for pack",
1000                              filename);
1001
1002         slot = get_active_slot();
1003         slot->results = &results;
1004         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1005         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1006         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1007         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1008         slot->local = packfile;
1009
1010         /* If there is data present from a previous transfer attempt,
1011            resume where it left off */
1012         prev_posn = ftell(packfile);
1013         if (prev_posn>0) {
1014                 if (get_verbosely)
1015                         fprintf(stderr,
1016                                 "Resuming fetch of pack %s at byte %ld\n",
1017                                 sha1_to_hex(target->sha1), prev_posn);
1018                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1019                 range_header = curl_slist_append(range_header, range);
1020                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1021         }
1022
1023         if (start_active_slot(slot)) {
1024                 run_active_slot(slot);
1025                 if (results.curl_result != CURLE_OK) {
1026                         fclose(packfile);
1027                         return error("Unable to get pack file %s\n%s", url,
1028                                      curl_errorstr);
1029                 }
1030         } else {
1031                 fclose(packfile);
1032                 return error("Unable to start request");
1033         }
1034
1035         fclose(packfile);
1036
1037         ret = move_temp_to_file(tmpfile, filename);
1038         if (ret)
1039                 return ret;
1040
1041         lst = &repo->packs;
1042         while (*lst != target)
1043                 lst = &((*lst)->next);
1044         *lst = (*lst)->next;
1045
1046         if (verify_pack(target, 0))
1047                 return -1;
1048         install_packed_git(target);
1049
1050         return 0;
1051 }
1052
1053 static void abort_object_request(struct object_request *obj_req)
1054 {
1055         if (obj_req->local >= 0) {
1056                 close(obj_req->local);
1057                 obj_req->local = -1;
1058         }
1059         unlink(obj_req->tmpfile);
1060         if (obj_req->slot) {
1061                 release_active_slot(obj_req->slot);
1062                 obj_req->slot = NULL;
1063         }
1064         release_object_request(obj_req);
1065 }
1066
1067 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1068 {
1069         char *hex = sha1_to_hex(sha1);
1070         int ret = 0;
1071         struct object_request *obj_req = object_queue_head;
1072
1073         while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1074                 obj_req = obj_req->next;
1075         if (obj_req == NULL)
1076                 return error("Couldn't find request for %s in the queue", hex);
1077
1078         if (has_sha1_file(obj_req->sha1)) {
1079                 abort_object_request(obj_req);
1080                 return 0;
1081         }
1082
1083 #ifdef USE_CURL_MULTI
1084         while (obj_req->state == WAITING) {
1085                 step_active_slots();
1086         }
1087 #else
1088         start_object_request(obj_req);
1089 #endif
1090
1091         while (obj_req->state == ACTIVE) {
1092                 run_active_slot(obj_req->slot);
1093         }
1094         if (obj_req->local != -1) {
1095                 close(obj_req->local); obj_req->local = -1;
1096         }
1097
1098         if (obj_req->state == ABORTED) {
1099                 ret = error("Request for %s aborted", hex);
1100         } else if (obj_req->curl_result != CURLE_OK &&
1101                    obj_req->http_code != 416) {
1102                 if (obj_req->http_code == 404 ||
1103                     obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1104                         ret = -1; /* Be silent, it is probably in a pack. */
1105                 else
1106                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1107                                     obj_req->errorstr, obj_req->curl_result,
1108                                     obj_req->http_code, hex);
1109         } else if (obj_req->zret != Z_STREAM_END) {
1110                 corrupt_object_found++;
1111                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1112         } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1113                 ret = error("File %s has bad hash", hex);
1114         } else if (obj_req->rename < 0) {
1115                 ret = error("unable to write sha1 filename %s",
1116                             obj_req->filename);
1117         }
1118
1119         release_object_request(obj_req);
1120         return ret;
1121 }
1122
1123 int fetch(unsigned char *sha1)
1124 {
1125         struct alt_base *altbase = alt;
1126
1127         if (!fetch_object(altbase, sha1))
1128                 return 0;
1129         while (altbase) {
1130                 if (!fetch_pack(altbase, sha1))
1131                         return 0;
1132                 fetch_alternates(alt->base);
1133                 altbase = altbase->next;
1134         }
1135         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1136                      alt->base);
1137 }
1138
1139 static inline int needs_quote(int ch)
1140 {
1141         switch (ch) {
1142         case '/': case '-': case '.':
1143         case 'A'...'Z': case 'a'...'z': case '0'...'9':
1144                 return 0;
1145         default:
1146                 return 1;
1147         }
1148 }
1149
1150 static inline int hex(int v)
1151 {
1152         if (v < 10) return '0' + v;
1153         else return 'A' + v - 10;
1154 }
1155
1156 static char *quote_ref_url(const char *base, const char *ref)
1157 {
1158         const char *cp;
1159         char *dp, *qref;
1160         int len, baselen, ch;
1161
1162         baselen = strlen(base);
1163         len = baselen + 6; /* "refs/" + NUL */
1164         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1165                 if (needs_quote(ch))
1166                         len += 2; /* extra two hex plus replacement % */
1167         qref = xmalloc(len);
1168         memcpy(qref, base, baselen);
1169         memcpy(qref + baselen, "refs/", 5);
1170         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1171                 if (needs_quote(ch)) {
1172                         *dp++ = '%';
1173                         *dp++ = hex((ch >> 4) & 0xF);
1174                         *dp++ = hex(ch & 0xF);
1175                 }
1176                 else
1177                         *dp++ = ch;
1178         }
1179         *dp = 0;
1180
1181         return qref;
1182 }
1183
1184 int fetch_ref(char *ref, unsigned char *sha1)
1185 {
1186         char *url;
1187         char hex[42];
1188         struct buffer buffer;
1189         char *base = alt->base;
1190         struct active_request_slot *slot;
1191         struct slot_results results;
1192         buffer.size = 41;
1193         buffer.posn = 0;
1194         buffer.buffer = hex;
1195         hex[41] = '\0';
1196
1197         url = quote_ref_url(base, ref);
1198         slot = get_active_slot();
1199         slot->results = &results;
1200         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1201         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1202         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1203         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1204         if (start_active_slot(slot)) {
1205                 run_active_slot(slot);
1206                 if (results.curl_result != CURLE_OK)
1207                         return error("Couldn't get %s for %s\n%s",
1208                                      url, ref, curl_errorstr);
1209         } else {
1210                 return error("Unable to start request");
1211         }
1212
1213         hex[40] = '\0';
1214         get_sha1_hex(hex, sha1);
1215         return 0;
1216 }
1217
1218 int main(int argc, char **argv)
1219 {
1220         char *commit_id;
1221         char *url;
1222         char *path;
1223         int arg = 1;
1224         int rc = 0;
1225
1226         setup_git_directory();
1227         git_config(git_default_config);
1228
1229         while (arg < argc && argv[arg][0] == '-') {
1230                 if (argv[arg][1] == 't') {
1231                         get_tree = 1;
1232                 } else if (argv[arg][1] == 'c') {
1233                         get_history = 1;
1234                 } else if (argv[arg][1] == 'a') {
1235                         get_all = 1;
1236                         get_tree = 1;
1237                         get_history = 1;
1238                 } else if (argv[arg][1] == 'v') {
1239                         get_verbosely = 1;
1240                 } else if (argv[arg][1] == 'w') {
1241                         write_ref = argv[arg + 1];
1242                         arg++;
1243                 } else if (!strcmp(argv[arg], "--recover")) {
1244                         get_recover = 1;
1245                 }
1246                 arg++;
1247         }
1248         if (argc < arg + 2) {
1249                 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1250                 return 1;
1251         }
1252         commit_id = argv[arg];
1253         url = argv[arg + 1];
1254         write_ref_log_details = url;
1255
1256         http_init();
1257
1258         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1259
1260         alt = xmalloc(sizeof(*alt));
1261         alt->base = url;
1262         alt->got_indices = 0;
1263         alt->packs = NULL;
1264         alt->next = NULL;
1265         path = strstr(url, "//");
1266         if (path) {
1267                 path = strchr(path+2, '/');
1268                 if (path)
1269                         alt->path_len = strlen(path);
1270         }
1271
1272         if (pull(commit_id))
1273                 rc = 1;
1274
1275         http_cleanup();
1276
1277         curl_slist_free_all(no_pragma_header);
1278
1279         if (corrupt_object_found) {
1280                 fprintf(stderr,
1281 "Some loose object were found to be corrupt, but they might be just\n"
1282 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1283 "status code.  Suggest running git fsck-objects.\n");
1284         }
1285         return rc;
1286 }