[PATCH] HTTP partial transfer support for object, pack, and index transfers
[git.git] / http-fetch.c
index 4e564fc..778d508 100644 (file)
 #define curl_global_init(a) do { /* nothing */ } while(0)
 #endif
 
+#define PREV_BUF_SIZE 4096
+#define RANGE_HEADER_SIZE 30
+
 static CURL *curl;
+static struct curl_slist *no_pragma_header;
+static struct curl_slist *no_range_header;
+static char curl_errorstr[CURL_ERROR_SIZE];
+
+static char *initial_base;
+
+struct alt_base
+{
+       char *base;
+       int got_indices;
+       struct packed_git *packs;
+       struct alt_base *next;
+};
 
-static char *base;
+static struct alt_base *alt = NULL;
 
 static SHA_CTX c;
 static z_stream stream;
@@ -24,6 +40,10 @@ static int local;
 static int zret;
 
 static int curl_ssl_verify;
+static char *ssl_cert;
+static char *ssl_key;
+static char *ssl_capath;
+static char *ssl_cainfo;
 
 struct buffer
 {
@@ -71,14 +91,37 @@ void prefetch(unsigned char *sha1)
 {
 }
 
-static int got_indices = 0;
+int relink_or_rename(char *old, char *new) {
+       int ret;
+
+       ret = link(old, new);
+       if (ret < 0) {
+               /* Same Coda hack as in write_sha1_file(sha1_file.c) */
+               ret = errno;
+               if (ret == EXDEV && !rename(old, new))
+                       return 0;
+       }
+       unlink(old);
+       if (ret) {
+               if (ret != EEXIST)
+                       return ret;
+       }
+
+       return 0;
+}
 
-static struct packed_git *packs = NULL;
+static int got_alternates = 0;
 
-static int fetch_index(unsigned char *sha1)
+static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 {
        char *filename;
        char *url;
+       char tmpfile[PATH_MAX];
+       int ret;
+       long prev_posn = 0;
+       char range[RANGE_HEADER_SIZE];
+       struct curl_slist *range_header = NULL;
+       CURLcode curl_result;
 
        FILE *indexfile;
 
@@ -89,12 +132,13 @@ static int fetch_index(unsigned char *sha1)
                fprintf(stderr, "Getting index for pack %s\n",
                        sha1_to_hex(sha1));
        
-       url = xmalloc(strlen(base) + 64);
+       url = xmalloc(strlen(repo->base) + 64);
        sprintf(url, "%s/objects/pack/pack-%s.idx",
-               base, sha1_to_hex(sha1));
+               repo->base, sha1_to_hex(sha1));
        
        filename = sha1_pack_index_name(sha1);
-       indexfile = fopen(filename, "w");
+       snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+       indexfile = fopen(tmpfile, "a");
        if (!indexfile)
                return error("Unable to open local file %s for pack index",
                             filename);
@@ -102,32 +146,159 @@ static int fetch_index(unsigned char *sha1)
        curl_easy_setopt(curl, CURLOPT_FILE, indexfile);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite);
        curl_easy_setopt(curl, CURLOPT_URL, url);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
+       curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
        
-       if (curl_easy_perform(curl)) {
+       /* If there is data present from a previous transfer attempt,
+          resume where it left off */
+       prev_posn = ftell(indexfile);
+       if (prev_posn>0) {
+               if (get_verbosely)
+                       fprintf(stderr,
+                               "Resuming fetch of index for pack %s at byte %ld\n",
+                               sha1_to_hex(sha1), prev_posn);
+               sprintf(range, "Range: bytes=%ld-", prev_posn);
+               range_header = curl_slist_append(range_header, range);
+               curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+       }
+
+       /* Clear out the Range: header after performing the request, so
+          other curl requests don't inherit inappropriate header data */
+       curl_result = curl_easy_perform(curl);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+       if (curl_result != 0) {
                fclose(indexfile);
-               return error("Unable to get pack index %s", url);
+               return error("Unable to get pack index %s\n%s", url,
+                            curl_errorstr);
        }
 
        fclose(indexfile);
+
+       ret = relink_or_rename(tmpfile, filename);
+       if (ret)
+               return error("unable to write index filename %s: %s",
+                            filename, strerror(ret));
+
        return 0;
 }
 
-static int setup_index(unsigned char *sha1)
+static int setup_index(struct alt_base *repo, unsigned char *sha1)
 {
        struct packed_git *new_pack;
        if (has_pack_file(sha1))
                return 0; // don't list this as something we can get
 
-       if (fetch_index(sha1))
+       if (fetch_index(repo, sha1))
                return -1;
 
        new_pack = parse_pack_index(sha1);
-       new_pack->next = packs;
-       packs = new_pack;
+       new_pack->next = repo->packs;
+       repo->packs = new_pack;
        return 0;
 }
 
-static int fetch_indices(void)
+static int fetch_alternates(char *base)
+{
+       int ret = 0;
+       struct buffer buffer;
+       char *url;
+       char *data;
+       int i = 0;
+       int http_specific = 1;
+       if (got_alternates)
+               return 0;
+       data = xmalloc(4096);
+       buffer.size = 4095;
+       buffer.posn = 0;
+       buffer.buffer = data;
+
+       if (get_verbosely)
+               fprintf(stderr, "Getting alternates list\n");
+       
+       url = xmalloc(strlen(base) + 31);
+       sprintf(url, "%s/objects/info/http-alternates", base);
+
+       curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
+       curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+       curl_easy_setopt(curl, CURLOPT_URL, url);
+
+       if (curl_easy_perform(curl) || !buffer.posn) {
+               http_specific = 0;
+
+               sprintf(url, "%s/objects/info/alternates", base);
+               
+               curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
+               curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+               curl_easy_setopt(curl, CURLOPT_URL, url);
+               
+               if (curl_easy_perform(curl)) {
+                       return 0;
+               }
+       }
+
+       data[buffer.posn] = '\0';
+
+       while (i < buffer.posn) {
+               int posn = i;
+               while (posn < buffer.posn && data[posn] != '\n')
+                       posn++;
+               if (data[posn] == '\n') {
+                       int okay = 0;
+                       int serverlen = 0;
+                       struct alt_base *newalt;
+                       char *target = NULL;
+                       if (data[i] == '/') {
+                               serverlen = strchr(base + 8, '/') - base;
+                               okay = 1;
+                       } else if (!memcmp(data + i, "../", 3)) {
+                               i += 3;
+                               serverlen = strlen(base);
+                               while (i + 2 < posn && 
+                                      !memcmp(data + i, "../", 3)) {
+                                       do {
+                                               serverlen--;
+                                       } while (serverlen &&
+                                                base[serverlen - 1] != '/');
+                                       i += 3;
+                               }
+                               // If the server got removed, give up.
+                               okay = strchr(base, ':') - base + 3 < 
+                                       serverlen;
+                       } else if (http_specific) {
+                               char *colon = strchr(data + i, ':');
+                               char *slash = strchr(data + i, '/');
+                               if (colon && slash && colon < data + posn &&
+                                   slash < data + posn && colon < slash) {
+                                       okay = 1;
+                               }
+                       }
+                       // skip 'objects' at end
+                       if (okay) {
+                               target = xmalloc(serverlen + posn - i - 6);
+                               strncpy(target, base, serverlen);
+                               strncpy(target + serverlen, data + i,
+                                       posn - i - 7);
+                               target[serverlen + posn - i - 7] = '\0';
+                               if (get_verbosely)
+                                       fprintf(stderr, 
+                                               "Also look at %s\n", target);
+                               newalt = xmalloc(sizeof(*newalt));
+                               newalt->next = alt;
+                               newalt->base = target;
+                               newalt->got_indices = 0;
+                               newalt->packs = NULL;
+                               alt = newalt;
+                               ret++;
+                       }
+               }
+               i = posn + 1;
+       }
+       got_alternates = 1;
+       
+       return ret;
+}
+
+static int fetch_indices(struct alt_base *repo)
 {
        unsigned char sha1[20];
        char *url;
@@ -135,7 +306,7 @@ static int fetch_indices(void)
        char *data;
        int i = 0;
 
-       if (got_indices)
+       if (repo->got_indices)
                return 0;
 
        data = xmalloc(4096);
@@ -146,18 +317,19 @@ static int fetch_indices(void)
        if (get_verbosely)
                fprintf(stderr, "Getting pack list\n");
        
-       url = xmalloc(strlen(base) + 21);
-       sprintf(url, "%s/objects/info/packs", base);
+       url = xmalloc(strlen(repo->base) + 21);
+       sprintf(url, "%s/objects/info/packs", repo->base);
 
        curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
        curl_easy_setopt(curl, CURLOPT_URL, url);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
+       curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
        
-       if (curl_easy_perform(curl)) {
-               return error("Unable to get pack index %s", url);
-       }
+       if (curl_easy_perform(curl))
+               return error("%s", curl_errorstr);
 
-       do {
+       while (i < buffer.posn) {
                switch (data[i]) {
                case 'P':
                        i++;
@@ -165,7 +337,7 @@ static int fetch_indices(void)
                            !strncmp(data + i, " pack-", 6) &&
                            !strncmp(data + i + 46, ".pack\n", 6)) {
                                get_sha1_hex(data + i + 6, sha1);
-                               setup_index(sha1);
+                               setup_index(repo, sha1);
                                i += 51;
                                break;
                        }
@@ -174,26 +346,31 @@ static int fetch_indices(void)
                                i++;
                }
                i++;
-       } while (i < buffer.posn);
+       }
 
-       got_indices = 1;
+       repo->got_indices = 1;
        return 0;
 }
 
-static int fetch_pack(unsigned char *sha1)
+static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 {
        char *url;
        struct packed_git *target;
        struct packed_git **lst;
        FILE *packfile;
        char *filename;
-
-       if (fetch_indices())
+       char tmpfile[PATH_MAX];
+       int ret;
+       long prev_posn = 0;
+       char range[RANGE_HEADER_SIZE];
+       struct curl_slist *range_header = NULL;
+       CURLcode curl_result;
+
+       if (fetch_indices(repo))
                return -1;
-       target = find_sha1_pack(sha1, packs);
+       target = find_sha1_pack(sha1, repo->packs);
        if (!target)
-               return error("Couldn't get %s: not separate or in any pack",
-                            sha1_to_hex(sha1));
+               return -1;
 
        if (get_verbosely) {
                fprintf(stderr, "Getting pack %s\n",
@@ -202,12 +379,13 @@ static int fetch_pack(unsigned char *sha1)
                        sha1_to_hex(sha1));
        }
 
-       url = xmalloc(strlen(base) + 65);
+       url = xmalloc(strlen(repo->base) + 65);
        sprintf(url, "%s/objects/pack/pack-%s.pack",
-               base, sha1_to_hex(target->sha1));
+               repo->base, sha1_to_hex(target->sha1));
 
        filename = sha1_pack_name(target->sha1);
-       packfile = fopen(filename, "w");
+       snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+       packfile = fopen(tmpfile, "a");
        if (!packfile)
                return error("Unable to open local file %s for pack",
                             filename);
@@ -215,15 +393,40 @@ static int fetch_pack(unsigned char *sha1)
        curl_easy_setopt(curl, CURLOPT_FILE, packfile);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite);
        curl_easy_setopt(curl, CURLOPT_URL, url);
-       
-       if (curl_easy_perform(curl)) {
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
+       curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
+
+       /* If there is data present from a previous transfer attempt,
+          resume where it left off */
+       prev_posn = ftell(packfile);
+       if (prev_posn>0) {
+               if (get_verbosely)
+                       fprintf(stderr,
+                               "Resuming fetch of pack %s at byte %ld\n",
+                               sha1_to_hex(target->sha1), prev_posn);
+               sprintf(range, "Range: bytes=%ld-", prev_posn);
+               range_header = curl_slist_append(range_header, range);
+               curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+       }
+
+       /* Clear out the Range: header after performing the request, so
+          other curl requests don't inherit inappropriate header data */
+       curl_result = curl_easy_perform(curl);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+       if (curl_result != 0) {
                fclose(packfile);
-               return error("Unable to get pack file %s", url);
+               return error("Unable to get pack file %s\n%s", url,
+                            curl_errorstr);
        }
 
        fclose(packfile);
 
-       lst = &packs;
+       ret = relink_or_rename(tmpfile, filename);
+       if (ret)
+               return error("unable to write pack filename %s: %s",
+                            filename, strerror(ret));
+
+       lst = &repo->packs;
        while (*lst != target)
                lst = &((*lst)->next);
        *lst = (*lst)->next;
@@ -233,18 +436,38 @@ static int fetch_pack(unsigned char *sha1)
        return 0;
 }
 
-int fetch(unsigned char *sha1)
+static int fetch_object(struct alt_base *repo, unsigned char *sha1)
 {
        char *hex = sha1_to_hex(sha1);
        char *filename = sha1_file_name(sha1);
        unsigned char real_sha1[20];
+       char tmpfile[PATH_MAX];
+       char prevfile[PATH_MAX];
+       int ret;
        char *url;
        char *posn;
+       int prevlocal;
+       unsigned char prev_buf[PREV_BUF_SIZE];
+       ssize_t prev_read = 0;
+       long prev_posn = 0;
+       char range[RANGE_HEADER_SIZE];
+       struct curl_slist *range_header = NULL;
+       CURLcode curl_result;
+
+       snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+       snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+       unlink(prevfile);
+       rename(tmpfile, prevfile);
+       unlink(tmpfile);
+
+       local = open(tmpfile, O_WRONLY | O_CREAT | O_EXCL, 0666);
 
-       local = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
+       /* Note: if another instance starts now, it will turn our new
+          tmpfile into its prevfile. */
 
        if (local < 0)
-               return error("Couldn't open local object %s\n", filename);
+               return error("Couldn't create temporary file %s for %s: %s\n",
+                            tmpfile, filename, strerror(errno));
 
        memset(&stream, 0, sizeof(stream));
 
@@ -255,10 +478,12 @@ int fetch(unsigned char *sha1)
        curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
        curl_easy_setopt(curl, CURLOPT_FILE, NULL);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
+       curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 
-       url = xmalloc(strlen(base) + 50);
-       strcpy(url, base);
-       posn = url + strlen(base);
+       url = xmalloc(strlen(repo->base) + 50);
+       strcpy(url, repo->base);
+       posn = url + strlen(repo->base);
        strcpy(posn, "objects/");
        posn += 8;
        memcpy(posn, hex, 2);
@@ -268,34 +493,105 @@ int fetch(unsigned char *sha1)
 
        curl_easy_setopt(curl, CURLOPT_URL, url);
 
-       if (curl_easy_perform(curl)) {
-               unlink(filename);
-               if (fetch_pack(sha1))
-                       return error("Tried %s", url);
-               return 0;
+       /* If a previous temp file is present, process what was already
+          fetched. */
+       prevlocal = open(prevfile, O_RDONLY);
+       if (prevlocal != -1) {
+               do {
+                       prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
+                       if (prev_read>0) {
+                               if (fwrite_sha1_file(prev_buf,
+                                                    1,
+                                                    prev_read,
+                                                    NULL) == prev_read) {
+                                       prev_posn += prev_read;
+                               } else {
+                                       prev_read = -1;
+                               }
+                       }
+               } while (prev_read > 0);
+               close(prevlocal);
+       }
+       unlink(prevfile);
+
+       /* Reset inflate/SHA1 if there was an error reading the previous temp
+          file; also rewind to the beginning of the local file. */
+       if (prev_read == -1) {
+               memset(&stream, 0, sizeof(stream));
+               inflateInit(&stream);
+               SHA1_Init(&c);
+               if (prev_posn>0) {
+                       prev_posn = 0;
+                       lseek(local, SEEK_SET, 0);
+               }
        }
 
+       /* If we have successfully processed data from a previous fetch
+          attempt, only fetch the data we don't already have. */
+       if (prev_posn>0) {
+               if (get_verbosely)
+                       fprintf(stderr,
+                               "Resuming fetch of object %s at byte %ld\n",
+                               hex, prev_posn);
+               sprintf(range, "Range: bytes=%ld-", prev_posn);
+               range_header = curl_slist_append(range_header, range);
+               curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+       }
+
+       /* Clear out the Range: header after performing the request, so
+          other curl requests don't inherit inappropriate header data */
+       curl_result = curl_easy_perform(curl);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+       if (curl_result != 0) {
+               unlink(tmpfile);
+               return error("%s", curl_errorstr);
+       }
+
+       fchmod(local, 0444);
        close(local);
        inflateEnd(&stream);
        SHA1_Final(real_sha1, &c);
        if (zret != Z_STREAM_END) {
-               unlink(filename);
+               unlink(tmpfile);
                return error("File %s (%s) corrupt\n", hex, url);
        }
        if (memcmp(sha1, real_sha1, 20)) {
-               unlink(filename);
+               unlink(tmpfile);
                return error("File %s has bad hash\n", hex);
        }
-       
+       ret = relink_or_rename(tmpfile, filename);
+       if (ret)
+               return error("unable to write sha1 filename %s: %s",
+                            filename, strerror(ret));
+
        pull_say("got %s\n", hex);
        return 0;
 }
 
+int fetch(unsigned char *sha1)
+{
+       struct alt_base *altbase = alt;
+       while (altbase) {
+               if (!fetch_object(altbase, sha1))
+                       return 0;
+               if (!fetch_pack(altbase, sha1))
+                       return 0;
+               if (fetch_alternates(altbase->base) > 0) {
+                       altbase = alt;
+                       continue;
+               }
+               altbase = altbase->next;
+       }
+       return error("Unable to find %s under %s\n", sha1_to_hex(sha1), 
+                    initial_base);
+}
+
 int fetch_ref(char *ref, unsigned char *sha1)
 {
         char *url, *posn;
         char hex[42];
         struct buffer buffer;
+       char *base = initial_base;
         buffer.size = 41;
         buffer.posn = 0;
         buffer.buffer = hex;
@@ -303,6 +599,8 @@ int fetch_ref(char *ref, unsigned char *sha1)
         
         curl_easy_setopt(curl, CURLOPT_FILE, &buffer);
         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
+       curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 
         url = xmalloc(strlen(base) + 6 + strlen(ref));
         strcpy(url, base);
@@ -314,7 +612,8 @@ int fetch_ref(char *ref, unsigned char *sha1)
         curl_easy_setopt(curl, CURLOPT_URL, url);
 
         if (curl_easy_perform(curl))
-                return error("Couldn't get %s for %s\n", url, ref);
+                return error("Couldn't get %s for %s\n%s",
+                            url, ref, curl_errorstr);
 
         hex[40] = '\0';
         get_sha1_hex(hex, sha1);
@@ -341,6 +640,8 @@ int main(int argc, char **argv)
                } else if (argv[arg][1] == 'w') {
                        write_ref = argv[arg + 1];
                        arg++;
+               } else if (!strcmp(argv[arg], "--recover")) {
+                       get_recover = 1;
                }
                arg++;
        }
@@ -354,6 +655,8 @@ int main(int argc, char **argv)
        curl_global_init(CURL_GLOBAL_ALL);
 
        curl = curl_easy_init();
+       no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
+       no_range_header = curl_slist_append(no_range_header, "Range:");
 
        curl_ssl_verify = getenv("GIT_SSL_NO_VERIFY") ? 0 : 1;
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, curl_ssl_verify);
@@ -361,11 +664,34 @@ int main(int argc, char **argv)
        curl_easy_setopt(curl, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
 #endif
 
-       base = url;
+       if ((ssl_cert = getenv("GIT_SSL_CERT")) != NULL) {
+               curl_easy_setopt(curl, CURLOPT_SSLCERT, ssl_cert);
+       }
+#if LIBCURL_VERSION_NUM >= 0x070902
+       if ((ssl_key = getenv("GIT_SSL_KEY")) != NULL) {
+               curl_easy_setopt(curl, CURLOPT_SSLKEY, ssl_key);
+       }
+#endif
+#if LIBCURL_VERSION_NUM >= 0x070908
+       if ((ssl_capath = getenv("GIT_SSL_CAPATH")) != NULL) {
+               curl_easy_setopt(curl, CURLOPT_CAPATH, ssl_capath);
+       }
+#endif
+       if ((ssl_cainfo = getenv("GIT_SSL_CAINFO")) != NULL) {
+               curl_easy_setopt(curl, CURLOPT_CAINFO, ssl_cainfo);
+       }
+
+       alt = xmalloc(sizeof(*alt));
+       alt->base = url;
+       alt->got_indices = 0;
+       alt->packs = NULL;
+       alt->next = NULL;
+       initial_base = url;
 
        if (pull(commit_id))
                return 1;
 
+       curl_slist_free_all(no_pragma_header);
        curl_global_cleanup();
        return 0;
 }