Fetch from a packed repository on dumb servers.
authorJunio C Hamano <junkio@cox.net>
Tue, 26 Jul 2005 05:42:18 +0000 (22:42 -0700)
committerJunio C Hamano <junkio@cox.net>
Sun, 31 Jul 2005 18:56:44 +0000 (11:56 -0700)
Implement fetching from a packed repository over http/https
using the dumb server support files.

I consider some parts of the logic should be in a separate C
program, but it appears to work with my simple tests.  I have
backburnered it for a bit too long for my liking, so let's throw
it out in the open and see what happens.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Makefile
git-fetch-dumb-http [new file with mode: 0755]
git-fetch-script
missing-revs.c [new file with mode: 0644]

index 332cd7d..db52a85 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -65,7 +65,7 @@ SCRIPTS=git git-apply-patch-script git-merge-one-file-script git-prune-script \
        git-format-patch-script git-sh-setup-script git-push-script \
        git-branch-script git-parse-remote git-verify-tag-script \
        git-ls-remote-script git-clone-dumb-http git-rename-script \
-       git-request-pull-script git-bisect-script
+       git-request-pull-script git-bisect-script git-fetch-dumb-http
 
 PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
        git-read-tree git-commit-tree git-cat-file git-fsck-cache \
@@ -79,7 +79,8 @@ PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
        git-unpack-objects git-verify-pack git-receive-pack git-send-pack \
        git-prune-packed git-fetch-pack git-upload-pack git-clone-pack \
        git-show-index git-daemon git-var git-peek-remote \
-       git-update-server-info git-show-rev-cache git-build-rev-cache
+       git-update-server-info git-show-rev-cache git-build-rev-cache \
+       git-missing-revs
 
 ifndef NO_CURL
 PROG+= git-http-pull
diff --git a/git-fetch-dumb-http b/git-fetch-dumb-http
new file mode 100755 (executable)
index 0000000..0034296
--- /dev/null
@@ -0,0 +1,100 @@
+#!/bin/sh
+#
+# Copyright (c) 2005, Junio C Hamano
+#
+# Called by git-fetch-script
+# Exits 2 when the remote site does not support dumb server protocol.
+
+# Usage: git-fetch-dumb-http <head-SHA1> <repo> [ <head> | tag <tag> ]
+
+. git-sh-setup-script || die "Not a git archive"
+head="$1"
+shift
+. git-parse-remote "$@"
+
+merge_repo="$_remote_repo"
+merge_head="$_remote_head"
+merge_store="$_remote_store"
+
+if [ -n "$GIT_SSL_NO_VERIFY" ]; then
+    curl_extra_args="-k"
+fi
+http_fetch () {
+       # $1 = Remote, $2 = Local
+       curl -ns $curl_extra_args "$1" >"$2"
+}
+
+# Try dumb server protocol
+
+clone_tmp=".git/clone-tmp$$" &&
+mkdir -p "$clone_tmp" || exit 1
+trap "rm -rf $clone_tmp" 0 1 2 3 15
+http_fetch "$merge_repo/info/refs" "$clone_tmp/refs" &&
+http_fetch "$merge_repo/objects/info/packs" "$clone_tmp/packs" &&
+http_fetch "$merge_repo/info/rev-cache" "$clone_tmp/rev-cache" || exit 2
+
+# Which packs are we interested in?
+has_missing=,
+while read tag num sha1 type
+do
+    case "$tag" in
+    T) ;;
+    *) continue ;;
+    esac
+    git-cat-file -t "$sha1" >/dev/null || has_missing="$has_missing$num,"
+done <$clone_tmp/packs
+
+# Slurp the pack index we do not have all objects for.
+pack_ix=0
+may_want_pack_count=0
+while read tag pack
+do
+    case "$tag" in
+    P) ;;
+    *) break ;; # P records always come first.
+    esac
+    case "$has_missing" in
+    *",$pack_ix,"*)
+       name=`expr "$pack" : '\(.*\)\.pack$'` &&
+       idx="$name.idx" &&
+       http_fetch "$merge_repo/objects/pack/$idx" "$clone_tmp/$idx" &&
+       # Note that idx file is sorted --- otherwise we need to sort it here.
+       git-show-index <"$clone_tmp/$idx" |
+       sed -e 's/^[^ ]* //' >"$clone_tmp/$name.toc" ||
+       exit 1
+       may_want_pack_count=`expr "$may_want_pack_count" + 1`
+       ;;
+    esac
+    pack_ix=`expr "$pack_ix" + 1`
+done <$clone_tmp/packs
+
+case "$may_want_pack_count" in
+0)
+    exit 0 ;;
+esac
+
+# We want $head.  What are the head objects we are missing?
+git-missing-revs $clone_tmp/rev-cache $head >$clone_tmp/missing-revs &&
+sort -o $clone_tmp/missing-revs $clone_tmp/missing-revs || exit 2
+
+for toc in $clone_tmp/*.toc
+do
+    name=`expr $toc : '.*/\([^/]*\)\.toc'` &&
+    comm -12 $clone_tmp/missing-revs $toc >$clone_tmp/$name.can
+    # FIXME: this is stupid.
+    if test -s $clone_tmp/$name.can
+    then
+       pack="$name.pack" idx="$name.idx" &&
+       http_fetch "$merge_repo/objects/pack/$pack" "$clone_tmp/$pack" &&
+       git-verify-pack "$clone_tmp/$pack" &&
+       mkdir -p "$GIT_OBJECT_DIRECTORY/pack" &&
+       mv "$clone_tmp/$pack" "$clone_tmp/$idx" \
+           "$GIT_OBJECT_DIRECTORY/pack/" || {
+           # remote may just have a stale dumb server information files.
+           # and normal pull might succeed.
+           exit 2
+       }
+    fi
+done
+
+exit 0
index 34ddfc8..2040c12 100755 (executable)
@@ -13,9 +13,29 @@ http://* | https://*)
         if [ -n "$GIT_SSL_NO_VERIFY" ]; then
             curl_extra_args="-k"
         fi
-       head=$(curl -ns $curl_extra_args "$merge_repo/$merge_head") || exit 1
+       _x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]' &&
+       _x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40" &&
+       head=$(curl -ns $curl_extra_args "$merge_repo/$merge_head") &&
+       expr "$head" : "$_x40\$" >/dev/null || {
+               echo >&2 "Failed to fetch $merge_head from $merge_repo"
+               exit 1
+       }
+       git-fetch-dumb-http "$head" "$@"
+       case "$?" in
+       0) ;;
+       2) no_dumb_http_support=1 ;;
+       *) exit;;
+       esac
        echo Fetching "$merge_head" using http
-       git-http-pull -v -a "$head" "$merge_repo/"
+       git-http-pull -v -a "$head" "$merge_repo/" || {
+           case "$no_dumb_http_support" in
+           1)
+               echo >&2 "* This could be because the $merge_repo is packed without"
+               echo >&2 "  preparing dumb server support files."
+               ;;
+           esac
+           exit 1
+       }
        ;;
 rsync://*)
        rsync -L "$merge_repo/$merge_head" "$TMP_HEAD" || exit 1
diff --git a/missing-revs.c b/missing-revs.c
new file mode 100644 (file)
index 0000000..afe41e3
--- /dev/null
@@ -0,0 +1,63 @@
+#include "cache.h"
+#include "rev-cache.h"
+
+static const char missing_revs_usage[] =
+"git-missing-revs <rev-cache-file> <want-sha1>...";
+
+#define REV_WANT 01
+#define REV_HAVE 02
+
+static void process(struct rev_cache *head_list)
+{
+       while (head_list) {
+               struct rev_cache *rc = head_list;
+               struct rev_list_elem *e;
+               head_list = rc->head_list;
+               rc->head_list = NULL;
+               if (has_sha1_file(rc->sha1)) {
+                       rc->work |= REV_HAVE;
+                       continue;
+               }
+               if (rc->work & (REV_WANT|REV_HAVE))
+                       continue;
+               rc->work |= REV_WANT;
+               printf("%s\n", sha1_to_hex(rc->sha1));
+               for (e = rc->parents; e; e = e->next) {
+                       if (e->ri->work & REV_HAVE)
+                               continue;
+                       e->ri->head_list = head_list;
+                       head_list = e->ri;
+               }
+       }
+}
+
+int main(int ac, char **av)
+{
+       const char *rev_cache_file;
+       struct rev_cache *head_list = NULL;
+       int i;
+
+       if (ac < 3)
+               usage(missing_revs_usage);
+       rev_cache_file = av[1];
+       read_rev_cache(rev_cache_file, NULL, 0);
+       for (i = 2; i < ac; i++) {
+               unsigned char sha1[20];
+               int pos;
+               struct rev_cache *rc;
+               if (get_sha1_hex(av[i], sha1))
+                       die("%s: not an SHA1", av[i]);
+               if ((pos = find_rev_cache(sha1)) < 0) {
+                       /* We could be asked for tags, which would not
+                        * appear in the rev-cache.
+                        */
+                       puts(av[i]);
+                       continue;
+               }
+               rc = rev_cache[pos];
+               rc->head_list = head_list;
+               head_list = rc;
+       }
+       process(head_list);
+       return 0;
+}