Merge branch 'master' into next
authorJunio C Hamano <junkio@cox.net>
Fri, 24 Feb 2006 10:22:01 +0000 (02:22 -0800)
committerJunio C Hamano <junkio@cox.net>
Fri, 24 Feb 2006 10:22:01 +0000 (02:22 -0800)
* master:
  Merge fixes early for next maint series.
  Merge branch 'fix' into maint
  git-am: do not allow empty commits by mistake.

13 files changed:
Makefile
blame.c [new file with mode: 0644]
count-delta.c
diff-delta.c
diffcore.h
fetch-pack.c
git-fetch.sh
git-push.sh
pack-objects.c
rev-list.c
rev-parse.c
send-pack.c
upload-pack.c

index e79aa96..8e6bbce 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -165,7 +165,7 @@ PROGRAMS = \
        git-upload-pack$X git-verify-pack$X git-write-tree$X \
        git-update-ref$X git-symbolic-ref$X git-check-ref-format$X \
        git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \
-       git-describe$X git-merge-tree$X
+       git-describe$X git-merge-tree$X git-blame$X
 
 # what 'all' will build and 'install' will install, in gitexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS)
diff --git a/blame.c b/blame.c
new file mode 100644 (file)
index 0000000..1e65546
--- /dev/null
+++ b/blame.c
@@ -0,0 +1,443 @@
+#include <assert.h>
+
+#include "cache.h"
+#include "refs.h"
+#include "tag.h"
+#include "commit.h"
+#include "tree.h"
+#include "blob.h"
+#include "epoch.h"
+#include "diff.h"
+
+#define DEBUG 0
+
+struct commit** blame_lines;
+int num_blame_lines;
+
+struct util_info
+{
+    int* line_map;
+    int num_lines;
+    unsigned char sha1[20]; /* blob sha, not commit! */
+    char* buf;
+    unsigned long size;
+//    const char* path;
+};
+
+struct chunk
+{
+    int off1, len1; // ---
+    int off2, len2; // +++
+};
+
+struct patch
+{
+    struct chunk* chunks;
+    int num;
+};
+
+static void get_blob(struct commit* commit);
+
+int num_get_patch = 0;
+int num_commits = 0;
+
+struct patch* get_patch(struct commit* commit, struct commit* other)
+{
+    struct patch* ret = xmalloc(sizeof(struct patch));
+    ret->chunks = NULL;
+    ret->num = 0;
+
+    struct util_info* info_c = (struct util_info*) commit->object.util;
+    struct util_info* info_o = (struct util_info*) other->object.util;
+
+    if(!memcmp(info_c->sha1, info_o->sha1, 20))
+        return ret;
+
+    get_blob(commit);
+    get_blob(other);
+
+    FILE* fout = fopen("/tmp/git-blame-tmp1", "w");
+    if(!fout)
+        die("fopen tmp1 failed: %s", strerror(errno));
+
+    if(fwrite(info_c->buf, info_c->size, 1, fout) != 1)
+        die("fwrite 1 failed: %s", strerror(errno));
+    fclose(fout);
+
+    fout = fopen("/tmp/git-blame-tmp2", "w");
+    if(!fout)
+        die("fopen tmp2 failed: %s", strerror(errno));
+
+    if(fwrite(info_o->buf, info_o->size, 1, fout) != 1)
+        die("fwrite 2 failed: %s", strerror(errno));
+    fclose(fout);
+
+    FILE* fin = popen("diff -u0 /tmp/git-blame-tmp1 /tmp/git-blame-tmp2", "r");
+    if(!fin)
+        die("popen failed: %s", strerror(errno));
+
+    char buf[1024];
+    while(fgets(buf, sizeof(buf), fin)) {
+        if(buf[0] != '@' || buf[1] != '@')
+            continue;
+
+        if(DEBUG)
+            printf("chunk line: %s", buf);
+        ret->num++;
+        ret->chunks = xrealloc(ret->chunks, sizeof(struct chunk)*ret->num);
+        struct chunk* chunk = &ret->chunks[ret->num-1];
+
+        assert(!strncmp(buf, "@@ -", 4));
+
+        char* start = buf+4;
+        char* sp = index(start, ' ');
+        *sp = '\0';
+        if(index(start, ',')) {
+            int ret = sscanf(start, "%d,%d", &chunk->off1, &chunk->len1);
+            assert(ret == 2);
+        } else {
+            int ret = sscanf(start, "%d", &chunk->off1);
+            assert(ret == 1);
+            chunk->len1 = 1;
+        }
+        *sp = ' ';
+
+        start = sp+1;
+        sp = index(start, ' ');
+        *sp = '\0';
+        if(index(start, ',')) {
+            int ret = sscanf(start, "%d,%d", &chunk->off2, &chunk->len2);
+            assert(ret == 2);
+        } else {
+            int ret = sscanf(start, "%d", &chunk->off2);
+            assert(ret == 1);
+            chunk->len2 = 1;
+        }
+        *sp = ' ';
+
+        if(chunk->off1 > 0)
+            chunk->off1 -= 1;
+        if(chunk->off2 > 0)
+            chunk->off2 -= 1;
+
+        assert(chunk->off1 >= 0);
+        assert(chunk->off2 >= 0);
+    }
+    fclose(fin);
+
+    num_get_patch++;
+    return ret;
+}
+
+void free_patch(struct patch* p)
+{
+    free(p->chunks);
+    free(p);
+}
+
+static int get_blob_sha1_internal(unsigned char *sha1, const char *base, int baselen,
+                                  const char *pathname, unsigned mode, int stage);
+
+
+static unsigned char blob_sha1[20];
+static int get_blob_sha1(struct tree* t, const char* pathname, unsigned char* sha1)
+{
+    const char *pathspec[2];
+    pathspec[0] = pathname;
+    pathspec[1] = NULL;
+    memset(blob_sha1, 0, sizeof(blob_sha1));
+    read_tree_recursive(t, "", 0, 0, pathspec, get_blob_sha1_internal);
+
+    int i;
+    for(i = 0; i < 20; i++) {
+        if(blob_sha1[i] != 0)
+            break;
+    }
+
+    if(i == 20)
+        return -1;
+
+    memcpy(sha1, blob_sha1, 20);
+    return 0;
+}
+
+static int get_blob_sha1_internal(unsigned char *sha1, const char *base, int baselen,
+                                  const char *pathname, unsigned mode, int stage)
+{
+//    printf("Got blob: %s base: '%s' baselen: %d pathname: '%s' mode: %o stage: %d\n",
+//           sha1_to_hex(sha1), base, baselen, pathname, mode, stage);
+
+    if(S_ISDIR(mode))
+        return READ_TREE_RECURSIVE;
+
+    memcpy(blob_sha1, sha1, 20);
+    return -1;
+}
+
+static void get_blob(struct commit* commit)
+{
+    struct util_info* info = commit->object.util;
+    char type[20];
+
+    if(info->buf)
+        return;
+
+    info->buf = read_sha1_file(info->sha1, type, &info->size);
+    assert(!strcmp(type, "blob"));
+}
+
+void print_patch(struct patch* p)
+{
+    printf("Num chunks: %d\n", p->num);
+    int i;
+    for(i = 0; i < p->num; i++) {
+        printf("%d,%d %d,%d\n", p->chunks[i].off1, p->chunks[i].len1, p->chunks[i].off2, p->chunks[i].len2);
+    }
+}
+
+
+// p is a patch from commit to other.
+void fill_line_map(struct commit* commit, struct commit* other, struct patch* p)
+{
+    int num_lines = ((struct util_info*) commit->object.util)->num_lines;
+    int* line_map = ((struct util_info*) commit->object.util)->line_map;
+    int num_lines2 = ((struct util_info*) other->object.util)->num_lines;
+    int* line_map2 = ((struct util_info*) other->object.util)->line_map;
+    int cur_chunk = 0;
+    int i1, i2;
+
+    if(p->num && DEBUG)
+        print_patch(p);
+
+    for(i1 = 0; i1 < num_lines; i1++)
+        line_map[i1] = -1;
+
+    if(DEBUG)
+        printf("num lines 1: %d num lines 2: %d\n", num_lines, num_lines2);
+
+    for(i1 = 0, i2 = 0; i1 < num_lines; i1++, i2++) {
+        if(DEBUG > 1)
+            printf("%d %d\n", i1, i2);
+
+        if(i2 >= num_lines2)
+            break;
+
+        line_map[i1] = line_map2[i2];
+
+        struct chunk* chunk = NULL;
+        if(cur_chunk < p->num)
+            chunk = &p->chunks[cur_chunk];
+
+        if(chunk && chunk->off1 == i1) {
+            i2 = chunk->off2;
+
+            if(chunk->len1 > 0)
+                i1 += chunk->len1-1;
+            if(chunk->len2 > 0)
+                i2 += chunk->len2-1;
+            cur_chunk++;
+        }
+    }
+}
+
+int map_line(struct commit* commit, int line)
+{
+    struct util_info* info = commit->object.util;
+    assert(line >= 0 && line < info->num_lines);
+    return info->line_map[line];
+}
+
+int fill_util_info(struct commit* commit, const char* path)
+{
+    if(commit->object.util)
+        return 0;
+
+    struct util_info* util = xmalloc(sizeof(struct util_info));
+    util->buf = NULL;
+    util->size = 0;
+    util->num_lines = -1;
+    util->line_map = NULL;
+
+    commit->object.util = util;
+
+    if(get_blob_sha1(commit->tree, path, util->sha1))
+        return -1;
+
+    return 0;
+}
+
+void alloc_line_map(struct commit* commit)
+{
+    struct util_info* util = commit->object.util;
+
+    if(util->line_map)
+        return;
+
+    get_blob(commit);
+
+    int i;
+    util->num_lines = 0;
+    for(i = 0; i < util->size; i++) {
+        if(util->buf[i] == '\n')
+            util->num_lines++;
+    }
+    util->line_map = xmalloc(sizeof(int)*util->num_lines);
+}
+
+void copy_line_map(struct commit* dst, struct commit* src)
+{
+    struct util_info* u_dst = dst->object.util;
+    struct util_info* u_src = src->object.util;
+
+    u_dst->line_map = u_src->line_map;
+    u_dst->num_lines = u_src->num_lines;
+    u_dst->buf = u_src->buf;
+    u_dst->size = u_src->size;
+}
+
+void process_commits(struct commit_list* list, const char* path)
+{
+    int i;
+
+    while(list) {
+        struct commit* commit = pop_commit(&list);
+        struct commit_list* parents;
+        struct util_info* info;
+
+        info = commit->object.util;
+        num_commits++;
+        if(DEBUG)
+            printf("\nProcessing commit: %d %s\n", num_commits, sha1_to_hex(commit->object.sha1));
+        for(parents = commit->parents;
+            parents != NULL; parents = parents->next) {
+            struct commit* parent = parents->item;
+
+            if(parse_commit(parent) < 0)
+                die("parse_commit error");
+
+            if(DEBUG)
+                printf("parent: %s\n", sha1_to_hex(parent->object.sha1));
+
+            if(fill_util_info(parent, path))
+                continue;
+
+            // Temporarily assign everything to the parent.
+            int num_blame = 0;
+            for(i = 0; i < num_blame_lines; i++) {
+                if(blame_lines[i] == commit) {
+                    num_blame++;
+                    blame_lines[i] = parent;
+                }
+            }
+
+            if(num_blame == 0)
+                continue;
+
+            struct patch* patch = get_patch(parent, commit);
+            if(patch->num == 0) {
+                copy_line_map(parent, commit);
+            } else {
+                alloc_line_map(parent);
+                fill_line_map(parent, commit, patch);
+            }
+
+            for(i = 0; i < patch->num; i++) {
+                int l;
+                for(l = 0; l < patch->chunks[i].len2; l++) {
+                    int mapped_line = map_line(commit, patch->chunks[i].off2 + l);
+                    if(mapped_line != -1 && blame_lines[mapped_line] == parent)
+                        blame_lines[mapped_line] = commit;
+                }
+            }
+            free_patch(patch);
+        }
+    }
+}
+
+#define SEEN 1
+struct commit_list* get_commit_list(struct commit* commit, const char* pathname)
+{
+    struct commit_list* ret = NULL;
+    struct commit_list* process = NULL;
+    unsigned char sha1[20];
+
+    commit_list_insert(commit, &process);
+
+    while(process) {
+        struct commit* com = pop_commit(&process);
+        if(com->object.flags & SEEN)
+            continue;
+
+        com->object.flags |= SEEN;
+        commit_list_insert(com, &ret);
+        struct commit_list* parents;
+
+        parse_commit(com);
+
+        for(parents = com->parents;
+            parents != NULL; parents = parents->next) {
+            struct commit* parent = parents->item;
+
+            parse_commit(parent);
+
+            if(!get_blob_sha1(parent->tree, pathname, sha1))
+                commit_list_insert(parent, &process);
+        }
+    }
+
+    return ret;
+}
+
+int main(int argc, const char **argv)
+{
+    unsigned char sha1[20];
+    struct commit *commit;
+    const char* filename;
+    int i;
+
+    setup_git_directory();
+
+    if (argc != 3)
+        die("Usage: blame commit-ish file");
+
+    if (get_sha1(argv[1], sha1))
+        die("get_sha1 failed");
+
+    commit = lookup_commit_reference(sha1);
+
+    filename = argv[2];
+
+    struct commit_list* list = get_commit_list(commit, filename);
+    sort_in_topological_order(&list, 1);
+
+    if(fill_util_info(commit, filename)) {
+        printf("%s not found in %s\n", filename, argv[1]);
+        return 0;
+    }
+    alloc_line_map(commit);
+
+    struct util_info* util = commit->object.util;
+    num_blame_lines = util->num_lines;
+    blame_lines = xmalloc(sizeof(struct commit*)*num_blame_lines);
+
+
+    for(i = 0; i < num_blame_lines; i++) {
+        blame_lines[i] = commit;
+
+        ((struct util_info*) commit->object.util)->line_map[i] = i;
+    }
+
+    process_commits(list, filename);
+
+    for(i = 0; i < num_blame_lines; i++) {
+        printf("%d %s\n", i+1-1, sha1_to_hex(blame_lines[i]->object.sha1));
+//        printf("%d %s\n", i+1-1, find_unique_abbrev(blame_lines[i]->object.sha1, 6));
+    }
+
+    if(DEBUG) {
+        printf("num get patch: %d\n", num_get_patch);
+        printf("num commits: %d\n", num_commits);
+    }
+
+    return 0;
+}
index 058a2aa..3ee3a0c 100644 (file)
@@ -3,11 +3,74 @@
  * The delta-parsing part is almost straight copy of patch-delta.c
  * which is (C) 2005 Nicolas Pitre <nico@cam.org>.
  */
+#include "cache.h"
+#include "delta.h"
+#include "count-delta.h"
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
-#include "delta.h"
-#include "count-delta.h"
+
+struct span {
+       struct span *next;
+       unsigned long ofs;
+       unsigned long end;
+};
+
+static void touch_range(struct span **span,
+                       unsigned long ofs, unsigned long end)
+{
+       struct span *e = *span;
+       struct span *p = NULL;
+
+       while (e && e->ofs <= ofs) {
+               again:
+               if (ofs < e->end) {
+                       while (e->end < end) {
+                               if (e->next && e->next->ofs <= end) {
+                                       e->end = e->next->ofs;
+                                       e = e->next;
+                               }
+                               else {
+                                       e->end = end;
+                                       return;
+                               }
+                       }
+                       return;
+               }
+               p = e;
+               e = e->next;
+       }
+       if (e && e->ofs <= end) {
+               e->ofs = ofs;
+               goto again;
+       }
+       else {
+               e = xmalloc(sizeof(*e));
+               e->ofs = ofs;
+               e->end = end;
+               if (p) {
+                       e->next = p->next;
+                       p->next = e;
+               }
+               else {
+                       e->next = *span;
+                       *span = e;
+               }
+       }
+}
+
+static unsigned long count_range(struct span *s)
+{
+       struct span *t;
+       unsigned long sz = 0;
+       while (s) {
+               t = s;
+               sz += s->end - s->ofs;
+               s = s->next;
+               free(t);
+       }
+       return sz;
+}
 
 /*
  * NOTE.  We do not _interpret_ delta fully.  As an approximation, we
 int count_delta(void *delta_buf, unsigned long delta_size,
                unsigned long *src_copied, unsigned long *literal_added)
 {
-       unsigned long copied_from_source, added_literal;
+       unsigned long added_literal;
        const unsigned char *data, *top;
        unsigned char cmd;
        unsigned long src_size, dst_size, out;
+       struct span *span = NULL;
 
        if (delta_size < DELTA_SIZE_MIN)
                return -1;
@@ -35,7 +99,7 @@ int count_delta(void *delta_buf, unsigned long delta_size,
        src_size = get_delta_hdr_size(&data);
        dst_size = get_delta_hdr_size(&data);
 
-       added_literal = copied_from_source = out = 0;
+       added_literal = out = 0;
        while (data < top) {
                cmd = *data++;
                if (cmd & 0x80) {
@@ -49,7 +113,7 @@ int count_delta(void *delta_buf, unsigned long delta_size,
                        if (cmd & 0x40) cp_size |= (*data++ << 16);
                        if (cp_size == 0) cp_size = 0x10000;
 
-                       copied_from_source += cp_size;
+                       touch_range(&span, cp_off, cp_off+cp_size);
                        out += cp_size;
                } else {
                        /* write literal into dst */
@@ -59,6 +123,8 @@ int count_delta(void *delta_buf, unsigned long delta_size,
                }
        }
 
+       *src_copied = count_range(span);
+
        /* sanity check */
        if (data != top || out != dst_size)
                return -1;
@@ -66,7 +132,6 @@ int count_delta(void *delta_buf, unsigned long delta_size,
        /* delete size is what was _not_ copied from source.
         * edit size is that and literal additions.
         */
-       *src_copied = copied_from_source;
        *literal_added = added_literal;
        return 0;
 }
index c2f656a..2ed5984 100644 (file)
@@ -19,8 +19,9 @@
  */
 
 #include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
 #include "delta.h"
-#include "zlib.h"
 
 
 /* block size: min = 16, max = 64k, power of 2 */
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 
 #define GR_PRIME 0x9e370001
-#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
-       
-static unsigned int hashbits(unsigned int size)
-{
-       unsigned int val = 1, bits = 0;
-       while (val < size && bits < 32) {
-               val <<= 1;
-               bits++;
-       }
-       return bits ? bits: 1;
-}
-
-typedef struct s_chanode {
-       struct s_chanode *next;
-       int icurr;
-} chanode_t;
-
-typedef struct s_chastore {
-       int isize, nsize;
-       chanode_t *ancur;
-} chastore_t;
-
-static void cha_init(chastore_t *cha, int isize, int icount)
-{
-       cha->isize = isize;
-       cha->nsize = icount * isize;
-       cha->ancur = NULL;
-}
-
-static void *cha_alloc(chastore_t *cha)
-{
-       chanode_t *ancur;
-       void *data;
+#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))
 
-       ancur = cha->ancur;
-       if (!ancur || ancur->icurr == cha->nsize) {
-               ancur = malloc(sizeof(chanode_t) + cha->nsize);
-               if (!ancur)
-                       return NULL;
-               ancur->icurr = 0;
-               ancur->next = cha->ancur;
-               cha->ancur = ancur;
-       }
-
-       data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
-       ancur->icurr += cha->isize;
-       return data;
-}
-
-static void cha_free(chastore_t *cha)
-{
-       chanode_t *cur = cha->ancur;
-       while (cur) {
-               chanode_t *tmp = cur;
-               cur = cur->next;
-               free(tmp);
-       }
-}
-
-typedef struct s_bdrecord {
-       struct s_bdrecord *next;
-       unsigned int fp;
+struct index {
        const unsigned char *ptr;
-} bdrecord_t;
-
-typedef struct s_bdfile {
-       chastore_t cha;
-       unsigned int fphbits;
-       bdrecord_t **fphash;
-} bdfile_t;
+       unsigned int val;
+       struct index *next;
+};
 
-static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
+static struct index ** delta_index(const unsigned char *buf,
+                                  unsigned long bufsize,
+                                  unsigned int *hash_shift)
 {
-       unsigned int fphbits;
-       int i, hsize;
-       const unsigned char *data, *top;
-       bdrecord_t *brec;
-       bdrecord_t **fphash;
-
-       fphbits = hashbits(bufsize / BLK_SIZE + 1);
-       hsize = 1 << fphbits;
-       fphash = malloc(hsize * sizeof(bdrecord_t *));
-       if (!fphash)
-               return -1;
-       for (i = 0; i < hsize; i++)
-               fphash[i] = NULL;
-       cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
-
-       top = buf + bufsize;
-       data = buf + (bufsize / BLK_SIZE) * BLK_SIZE;
-       if (data == top)
+       unsigned int hsize, hshift, entries, blksize, i;
+       const unsigned char *data;
+       struct index *entry, **hash;
+       void *mem;
+
+       /* determine index hash size */
+       entries = (bufsize + BLK_SIZE - 1) / BLK_SIZE;
+       hsize = entries / 4;
+       for (i = 4; (1 << i) < hsize && i < 16; i++);
+       hsize = 1 << i;
+       hshift = 32 - i;
+       *hash_shift = hshift;
+
+       /* allocate lookup index */
+       mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry));
+       if (!mem)
+               return NULL;
+       hash = mem;
+       entry = mem + hsize * sizeof(*hash);
+       memset(hash, 0, hsize * sizeof(*hash));
+
+       /* then populate it */
+       data = buf + entries * BLK_SIZE - BLK_SIZE;
+       blksize = bufsize - (data - buf);
+       while (data >= buf) {
+               unsigned int val = adler32(0, data, blksize);
+               i = HASH(val, hshift);
+               entry->ptr = data;
+               entry->val = val;
+               entry->next = hash[i];
+               hash[i] = entry++;
+               blksize = BLK_SIZE;
                data -= BLK_SIZE;
+       }
 
-       for ( ; data >= buf; data -= BLK_SIZE) {
-               brec = cha_alloc(&bdf->cha);
-               if (!brec) {
-                       cha_free(&bdf->cha);
-                       free(fphash);
-                       return -1;
-               }
-               brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
-               brec->ptr = data;
-               i = HASH(brec->fp, fphbits);
-               brec->next = fphash[i];
-               fphash[i] = brec;
-       }
-
-       bdf->fphbits = fphbits;
-       bdf->fphash = fphash;
-
-       return 0;
-}
-
-static void delta_cleanup(bdfile_t *bdf)
-{
-       free(bdf->fphash);
-       cha_free(&bdf->cha);
+       return hash;
 }
 
+/* provide the size of the copy opcode given the block offset and size */
 #define COPYOP_SIZE(o, s) \
     (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
      !!(s & 0xff) + !!(s & 0xff00) + 1)
 
+/* the maximum size for any opcode */
+#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
+
 void *diff_delta(void *from_buf, unsigned long from_size,
                 void *to_buf, unsigned long to_size,
                 unsigned long *delta_size,
                 unsigned long max_size)
 {
-       int i, outpos, outsize, inscnt, csize, msize, moff;
-       unsigned int fp;
-       const unsigned char *ref_data, *ref_top, *data, *top, *ptr1, *ptr2;
-       unsigned char *out, *orig;
-       bdrecord_t *brec;
-       bdfile_t bdf;
+       unsigned int i, outpos, outsize, inscnt, hash_shift;
+       const unsigned char *ref_data, *ref_top, *data, *top;
+       unsigned char *out;
+       struct index *entry, **hash;
 
-       if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
+       if (!from_size || !to_size)
+               return NULL;
+       hash = delta_index(from_buf, from_size, &hash_shift);
+       if (!hash)
                return NULL;
-       
+
        outpos = 0;
        outsize = 8192;
+       if (max_size && outsize >= max_size)
+               outsize = max_size + MAX_OP_SIZE + 1;
        out = malloc(outsize);
        if (!out) {
-               delta_cleanup(&bdf);
+               free(hash);
                return NULL;
        }
 
@@ -199,28 +138,32 @@ void *diff_delta(void *from_buf, unsigned long from_size,
        }
 
        inscnt = 0;
-       moff = 0;
-       while (data < top) {
-               msize = 0;
-               fp = adler32(0, data, MIN(top - data, BLK_SIZE));
-               i = HASH(fp, bdf.fphbits);
-               for (brec = bdf.fphash[i]; brec; brec = brec->next) {
-                       if (brec->fp == fp) {
-                               csize = ref_top - brec->ptr;
-                               if (csize > top - data)
-                                       csize = top - data;
-                               for (ptr1 = brec->ptr, ptr2 = data; 
-                                    csize && *ptr1 == *ptr2;
-                                    csize--, ptr1++, ptr2++);
 
-                               csize = ptr1 - brec->ptr;
-                               if (csize > msize) {
-                                       moff = brec->ptr - ref_data;
-                                       msize = csize;
-                                       if (msize >= 0x10000) {
-                                               msize = 0x10000;
-                                               break;
-                                       }
+       while (data < top) {
+               unsigned int moff = 0, msize = 0;
+               unsigned int blksize = MIN(top - data, BLK_SIZE);
+               unsigned int val = adler32(0, data, blksize);
+               i = HASH(val, hash_shift);
+               for (entry = hash[i]; entry; entry = entry->next) {
+                       const unsigned char *ref = entry->ptr;
+                       const unsigned char *src = data;
+                       unsigned int ref_size = ref_top - ref;
+                       if (entry->val != val)
+                               continue;
+                       if (ref_size > top - src)
+                               ref_size = top - src;
+                       while (ref_size && *src++ == *ref) {
+                               ref++;
+                               ref_size--;
+                       }
+                       ref_size = ref - entry->ptr;
+                       if (ref_size > msize) {
+                               /* this is our best match so far */
+                               moff = entry->ptr - ref_data;
+                               msize = ref_size;
+                               if (msize >= 0x10000) {
+                                       msize = 0x10000;
+                                       break;
                                }
                        }
                }
@@ -235,13 +178,15 @@ void *diff_delta(void *from_buf, unsigned long from_size,
                                inscnt = 0;
                        }
                } else {
+                       unsigned char *op;
+
                        if (inscnt) {
                                out[outpos - inscnt - 1] = inscnt;
                                inscnt = 0;
                        }
 
                        data += msize;
-                       orig = out + outpos++;
+                       op = out + outpos++;
                        i = 0x80;
 
                        if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
@@ -256,23 +201,21 @@ void *diff_delta(void *from_buf, unsigned long from_size,
                        msize >>= 8;
                        if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
 
-                       *orig = i;
-               }
-
-               if (max_size && outpos > max_size) {
-                       free(out);
-                       delta_cleanup(&bdf);
-                       return NULL;
+                       *op = i;
                }
 
-               /* next time around the largest possible output is 1 + 4 + 3 */
-               if (outpos > outsize - 8) {
+               if (outpos >= outsize - MAX_OP_SIZE) {
                        void *tmp = out;
                        outsize = outsize * 3 / 2;
-                       out = realloc(out, outsize);
+                       if (max_size && outsize >= max_size)
+                               outsize = max_size + MAX_OP_SIZE + 1;
+                       if (max_size && outpos > max_size)
+                               out = NULL;
+                       else
+                               out = realloc(out, outsize);
                        if (!out) {
                                free(tmp);
-                               delta_cleanup(&bdf);
+                               free(hash);
                                return NULL;
                        }
                }
@@ -281,7 +224,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
        if (inscnt)
                out[outpos - inscnt - 1] = inscnt;
 
-       delta_cleanup(&bdf);
+       free(hash);
        *delta_size = outpos;
        return out;
 }
index 12cd816..91d6c63 100644 (file)
@@ -18,7 +18,7 @@
 #define MAX_SCORE 60000.0
 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
 #define DEFAULT_BREAK_SCORE  30000 /* minimum for break to happen (50%)*/
-#define DEFAULT_MERGE_SCORE  48000 /* maximum for break-merge to happen (80%)*/
+#define DEFAULT_MERGE_SCORE  45000 /* maximum for break-merge to happen (75%)*/
 
 #define MINIMUM_BREAK_SIZE     400 /* do not break a file smaller than this */
 
index aa6f42a..09738fe 100644 (file)
@@ -8,7 +8,7 @@ static int keep_pack;
 static int quiet;
 static int verbose;
 static const char fetch_pack_usage[] =
-"git-fetch-pack [-q] [-v] [-k] [--exec=upload-pack] [host:]directory <refs>...";
+"git-fetch-pack [-q] [-v] [-k] [--thin] [--exec=upload-pack] [host:]directory <refs>...";
 static const char *exec = "git-upload-pack";
 
 #define COMPLETE       (1U << 0)
@@ -18,7 +18,7 @@ static const char *exec = "git-upload-pack";
 #define POPPED         (1U << 4)
 
 static struct commit_list *rev_list = NULL;
-static int non_common_revs = 0, multi_ack = 0;
+static int non_common_revs = 0, multi_ack = 0, use_thin_pack = 0;
 
 static void rev_list_push(struct commit *commit, int mark)
 {
@@ -156,8 +156,9 @@ static int find_common(int fd[2], unsigned char *result_sha1,
                        continue;
                }
 
-               packet_write(fd[1], "want %s%s\n", sha1_to_hex(remote),
-                       multi_ack ? " multi_ack" : "");
+               packet_write(fd[1], "want %s%s%s\n", sha1_to_hex(remote),
+                            (multi_ack ? " multi_ack" : ""),
+                            (use_thin_pack ? " thin-pack" : ""));
                fetching++;
        }
        packet_flush(fd[1]);
@@ -421,6 +422,10 @@ int main(int argc, char **argv)
                                keep_pack = 1;
                                continue;
                        }
+                       if (!strcmp("--thin", arg)) {
+                               use_thin_pack = 1;
+                               continue;
+                       }
                        if (!strcmp("-v", arg)) {
                                verbose = 1;
                                continue;
@@ -434,6 +439,8 @@ int main(int argc, char **argv)
        }
        if (!dest)
                usage(fetch_pack_usage);
+       if (keep_pack)
+               use_thin_pack = 0;
        pid = git_connect(fd, dest, exec);
        if (pid < 0)
                return 1;
index fcc24f8..de4f011 100755 (executable)
@@ -320,7 +320,7 @@ fetch_main () {
     ( : subshell because we muck with IFS
       IFS="    $LF"
       (
-         git-fetch-pack $exec $keep "$remote" $rref || echo failed "$remote"
+         git-fetch-pack $exec $keep --thin "$remote" $rref || echo failed "$remote"
       ) |
       while read sha1 remote_name
       do
index 706db99..73dcf06 100755 (executable)
@@ -8,6 +8,7 @@ USAGE='[--all] [--tags] [--force] <repository> [<refspec>...]'
 has_all=
 has_force=
 has_exec=
+has_thin=
 remote=
 do_tags=
 
@@ -22,6 +23,8 @@ do
                has_force=--force ;;
        --exec=*)
                has_exec="$1" ;;
+       --thin)
+               has_thin="$1" ;;
        -*)
                 usage ;;
         *)
@@ -72,6 +75,7 @@ set x "$remote" "$@"; shift
 test "$has_all" && set x "$has_all" "$@" && shift
 test "$has_force" && set x "$has_force" "$@" && shift
 test "$has_exec" && set x "$has_exec" "$@" && shift
+test "$has_thin" && set x "$has_thin" "$@" && shift
 
 case "$remote" in
 http://* | https://*)
index 8f352aa..be7a200 100644 (file)
@@ -3,6 +3,7 @@
 #include "delta.h"
 #include "pack.h"
 #include "csum-file.h"
+#include "diff.h"
 #include <sys/time.h>
 #include <signal.h>
 
@@ -27,6 +28,13 @@ struct object_entry {
        struct object_entry *delta_sibling; /* other deltified objects who
                                             * uses the same base as me
                                             */
+       int preferred_base;     /* we do not pack this, but is encouraged to
+                                * be used as the base objectto delta huge
+                                * objects against.
+                                */
+       int based_on_preferred; /* current delta candidate is a preferred
+                                * one, or delta against a preferred one.
+                                */
 };
 
 /*
@@ -49,7 +57,7 @@ static int local = 0;
 static int incremental = 0;
 static struct object_entry **sorted_by_sha, **sorted_by_type;
 static struct object_entry *objects = NULL;
-static int nr_objects = 0, nr_alloc = 0;
+static int nr_objects = 0, nr_alloc = 0, nr_result = 0;
 static const char *base_name;
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
@@ -231,7 +239,8 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
        return n;
 }
 
-static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
+static unsigned long write_object(struct sha1file *f,
+                                 struct object_entry *entry)
 {
        unsigned long size;
        char type[10];
@@ -241,6 +250,9 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
        enum object_type obj_type;
        int to_reuse = 0;
 
+       if (entry->preferred_base)
+               return 0;
+
        obj_type = entry->type;
        if (! entry->in_pack)
                to_reuse = 0;   /* can't reuse what we don't have */
@@ -335,20 +347,22 @@ static void write_pack_file(void)
                do_progress = progress;
        }
        if (do_progress)
-               fprintf(stderr, "Writing %d objects.\n", nr_objects);
+               fprintf(stderr, "Writing %d objects.\n", nr_result);
 
        hdr.hdr_signature = htonl(PACK_SIGNATURE);
        hdr.hdr_version = htonl(PACK_VERSION);
-       hdr.hdr_entries = htonl(nr_objects);
+       hdr.hdr_entries = htonl(nr_result);
        sha1write(f, &hdr, sizeof(hdr));
        offset = sizeof(hdr);
+       if (!nr_result)
+               goto done;
        for (i = 0; i < nr_objects; i++) {
                offset = write_one(f, objects + i, offset);
                if (do_progress) {
-                       unsigned percent = written * 100 / nr_objects;
+                       unsigned percent = written * 100 / nr_result;
                        if (progress_update || percent != last_percent) {
                                fprintf(stderr, "%4u%% (%u/%u) done\r",
-                                       percent, written, nr_objects);
+                                       percent, written, nr_result);
                                progress_update = 0;
                                last_percent = percent;
                        }
@@ -356,16 +370,17 @@ static void write_pack_file(void)
        }
        if (do_progress)
                fputc('\n', stderr);
-
+ done:
        sha1close(f, pack_file_sha1, 1);
 }
 
 static void write_index_file(void)
 {
        int i;
-       struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
+       struct sha1file *f = sha1create("%s-%s.%s", base_name,
+                                       sha1_to_hex(object_list_sha1), "idx");
        struct object_entry **list = sorted_by_sha;
-       struct object_entry **last = list + nr_objects;
+       struct object_entry **last = list + nr_result;
        unsigned int array[256];
 
        /*
@@ -390,7 +405,7 @@ static void write_index_file(void)
         * Write the actual SHA1 entries..
         */
        list = sorted_by_sha;
-       for (i = 0; i < nr_objects; i++) {
+       for (i = 0; i < nr_result; i++) {
                struct object_entry *entry = *list++;
                unsigned int offset = htonl(entry->offset);
                sha1write(f, &offset, 4);
@@ -400,27 +415,139 @@ static void write_index_file(void)
        sha1close(f, NULL, 1);
 }
 
-static int add_object_entry(unsigned char *sha1, unsigned int hash)
+static int locate_object_entry_hash(const unsigned char *sha1)
+{
+       int i;
+       unsigned int ui;
+       memcpy(&ui, sha1, sizeof(unsigned int));
+       i = ui % object_ix_hashsz;
+       while (0 < object_ix[i]) {
+               if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
+                       return i;
+               if (++i == object_ix_hashsz)
+                       i = 0;
+       }
+       return -1 - i;
+}
+
+static struct object_entry *locate_object_entry(const unsigned char *sha1)
+{
+       int i;
+
+       if (!object_ix_hashsz)
+               return NULL;
+
+       i = locate_object_entry_hash(sha1);
+       if (0 <= i)
+               return &objects[object_ix[i]-1];
+       return NULL;
+}
+
+static void rehash_objects(void)
+{
+       int i;
+       struct object_entry *oe;
+
+       object_ix_hashsz = nr_objects * 3;
+       if (object_ix_hashsz < 1024)
+               object_ix_hashsz = 1024;
+       object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
+       object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
+       for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
+               int ix = locate_object_entry_hash(oe->sha1);
+               if (0 <= ix)
+                       continue;
+               ix = -1 - ix;
+               object_ix[ix] = i + 1;
+       }
+}
+
+struct name_path {
+       struct name_path *up;
+       const char *elem;
+       int len;
+};
+
+#define DIRBITS 12
+
+static unsigned name_hash(struct name_path *path, const char *name)
+{
+       struct name_path *p = path;
+       const char *n = name + strlen(name);
+       unsigned hash = 0, name_hash = 0, name_done = 0;
+
+       if (n != name && n[-1] == '\n')
+               n--;
+       while (name <= --n) {
+               unsigned char c = *n;
+               if (c == '/' && !name_done) {
+                       name_hash = hash;
+                       name_done = 1;
+                       hash = 0;
+               }
+               hash = hash * 11 + c;
+       }
+       if (!name_done) {
+               name_hash = hash;
+               hash = 0;
+       }
+       for (p = path; p; p = p->up) {
+               hash = hash * 11 + '/';
+               n = p->elem + p->len;
+               while (p->elem <= --n) {
+                       unsigned char c = *n;
+                       hash = hash * 11 + c;
+               }
+       }
+       /*
+        * Make sure "Makefile" and "t/Makefile" are hashed separately
+        * but close enough.
+        */
+       hash = (name_hash<<DIRBITS) | (hash & ((1U<<DIRBITS )-1));
+
+       if (0) { /* debug */
+               n = name + strlen(name);
+               if (n != name && n[-1] == '\n')
+                       n--;
+               while (name <= --n)
+                       fputc(*n, stderr);
+               for (p = path; p; p = p->up) {
+                       fputc('/', stderr);
+                       n = p->elem + p->len;
+                       while (p->elem <= --n)
+                               fputc(*n, stderr);
+               }
+               fprintf(stderr, "\t%08x\n", hash);
+       }
+       return hash;
+}
+
+static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
 {
        unsigned int idx = nr_objects;
        struct object_entry *entry;
        struct packed_git *p;
        unsigned int found_offset = 0;
        struct packed_git *found_pack = NULL;
-
-       for (p = packed_git; p; p = p->next) {
-               struct pack_entry e;
-               if (find_pack_entry_one(sha1, &e, p)) {
-                       if (incremental)
-                               return 0;
-                       if (local && !p->pack_local)
-                               return 0;
-                       if (!found_pack) {
-                               found_offset = e.offset;
-                               found_pack = e.p;
+       int ix, status = 0;
+
+       if (!exclude) {
+               for (p = packed_git; p; p = p->next) {
+                       struct pack_entry e;
+                       if (find_pack_entry_one(sha1, &e, p)) {
+                               if (incremental)
+                                       return 0;
+                               if (local && !p->pack_local)
+                                       return 0;
+                               if (!found_pack) {
+                                       found_offset = e.offset;
+                                       found_pack = e.p;
+                               }
                        }
                }
        }
+       if ((entry = locate_object_entry(sha1)) != NULL)
+               goto already_added;
 
        if (idx >= nr_alloc) {
                unsigned int needed = (idx + 1024) * 3 / 2;
@@ -428,45 +555,94 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
                nr_alloc = needed;
        }
        entry = objects + idx;
+       nr_objects = idx + 1;
        memset(entry, 0, sizeof(*entry));
        memcpy(entry->sha1, sha1, 20);
        entry->hash = hash;
-       if (found_pack) {
-               entry->in_pack = found_pack;
-               entry->in_pack_offset = found_offset;
+
+       if (object_ix_hashsz * 3 <= nr_objects * 4)
+               rehash_objects();
+       else {
+               ix = locate_object_entry_hash(entry->sha1);
+               if (0 <= ix)
+                       die("internal error in object hashing.");
+               object_ix[-1 - ix] = idx + 1;
        }
-       nr_objects = idx+1;
-       return 1;
+       status = 1;
+
+ already_added:
+       if (progress_update) {
+               fprintf(stderr, "Counting objects...%d\r", nr_objects);
+               progress_update = 0;
+       }
+       if (exclude)
+               entry->preferred_base = 1;
+       else {
+               if (found_pack) {
+                       entry->in_pack = found_pack;
+                       entry->in_pack_offset = found_offset;
+               }
+       }
+       return status;
 }
 
-static int locate_object_entry_hash(unsigned char *sha1)
+static void add_pbase_tree(struct tree_desc *tree, struct name_path *up)
 {
-       int i;
-       unsigned int ui;
-       memcpy(&ui, sha1, sizeof(unsigned int));
-       i = ui % object_ix_hashsz;
-       while (0 < object_ix[i]) {
-               if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
-                       return i;
-               if (++i == object_ix_hashsz)
-                       i = 0;
+       while (tree->size) {
+               const unsigned char *sha1;
+               const char *name;
+               unsigned mode, hash;
+               unsigned long size;
+               char type[20];
+
+               sha1 = tree_entry_extract(tree, &name, &mode);
+               update_tree_entry(tree);
+               if (!has_sha1_file(sha1))
+                       continue;
+               if (sha1_object_info(sha1, type, &size))
+                       continue;
+
+               hash = name_hash(up, name);
+               if (!add_object_entry(sha1, hash, 1))
+                       continue;
+
+               if (!strcmp(type, "tree")) {
+                       struct tree_desc sub;
+                       void *elem;
+                       struct name_path me;
+
+                       elem = read_sha1_file(sha1, type, &sub.size);
+                       sub.buf = elem;
+                       if (sub.buf) {
+                               me.up = up;
+                               me.elem = name;
+                               me.len = strlen(name);
+                               add_pbase_tree(&sub, &me);
+                               free(elem);
+                       }
+               }
        }
-       return -1 - i;
 }
 
-static struct object_entry *locate_object_entry(unsigned char *sha1)
+static void add_preferred_base(unsigned char *sha1)
 {
-       int i = locate_object_entry_hash(sha1);
-       if (0 <= i)
-               return &objects[object_ix[i]-1];
-       return NULL;
+       struct tree_desc tree;
+       void *elem;
+
+       elem = read_object_with_reference(sha1, "tree", &tree.size, NULL);
+       tree.buf = elem;
+       if (!tree.buf)
+               return;
+       if (add_object_entry(sha1, name_hash(NULL, ""), 1))
+               add_pbase_tree(&tree, NULL);
+       free(elem);
 }
 
 static void check_object(struct object_entry *entry)
 {
        char type[20];
 
-       if (entry->in_pack) {
+       if (entry->in_pack && !entry->preferred_base) {
                unsigned char base[20];
                unsigned long size;
                struct object_entry *base_entry;
@@ -485,7 +661,8 @@ static void check_object(struct object_entry *entry)
                 */
                if (!no_reuse_delta &&
                    entry->in_pack_type == OBJ_DELTA &&
-                   (base_entry = locate_object_entry(base))) {
+                   (base_entry = locate_object_entry(base)) &&
+                   (!base_entry->preferred_base)) {
 
                        /* Depth value does not matter - find_deltas()
                         * will never consider reused delta as the
@@ -523,25 +700,6 @@ static void check_object(struct object_entry *entry)
                    sha1_to_hex(entry->sha1), type);
 }
 
-static void hash_objects(void)
-{
-       int i;
-       struct object_entry *oe;
-
-       object_ix_hashsz = nr_objects * 2;
-       object_ix = xcalloc(sizeof(int), object_ix_hashsz);
-       for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
-               int ix = locate_object_entry_hash(oe->sha1);
-               if (0 <= ix) {
-                       error("the same object '%s' added twice",
-                             sha1_to_hex(oe->sha1));
-                       continue;
-               }
-               ix = -1 - ix;
-               object_ix[ix] = i + 1;
-       }
-}
-
 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
 {
        struct object_entry *child = me->delta_child;
@@ -560,14 +718,26 @@ static void get_object_details(void)
        int i;
        struct object_entry *entry;
 
-       hash_objects();
        prepare_pack_ix();
        for (i = 0, entry = objects; i < nr_objects; i++, entry++)
                check_object(entry);
-       for (i = 0, entry = objects; i < nr_objects; i++, entry++)
-               if (!entry->delta && entry->delta_child)
-                       entry->delta_limit =
-                               check_delta_limit(entry, 1);
+
+       if (nr_objects == nr_result) {
+               /*
+                * Depth of objects that depend on the entry -- this
+                * is subtracted from depth-max to break too deep
+                * delta chain because of delta data reusing.
+                * However, we loosen this restriction when we know we
+                * are creating a thin pack -- it will have to be
+                * expanded on the other end anyway, so do not
+                * artificially cut the delta chain and let it go as
+                * deep as it wants.
+                */
+               for (i = 0, entry = objects; i < nr_objects; i++, entry++)
+                       if (!entry->delta && entry->delta_child)
+                               entry->delta_limit =
+                                       check_delta_limit(entry, 1);
+       }
 }
 
 typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
@@ -598,6 +768,24 @@ static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
        return memcmp(a->sha1, b->sha1, 20);
 }
 
+static struct object_entry **create_final_object_list()
+{
+       struct object_entry **list;
+       int i, j;
+
+       for (i = nr_result = 0; i < nr_objects; i++)
+               if (!objects[i].preferred_base)
+                       nr_result++;
+       list = xmalloc(nr_result * sizeof(struct object_entry *));
+       for (i = j = 0; i < nr_objects; i++) {
+               if (!objects[i].preferred_base)
+                       list[j++] = objects + i;
+       }
+       current_sort = sha1_sort;
+       qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator);
+       return list;
+}
+
 static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 {
        if (a->type < b->type)
@@ -608,6 +796,10 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
                return -1;
        if (a->hash > b->hash)
                return 1;
+       if (a->preferred_base < b->preferred_base)
+               return -1;
+       if (a->preferred_base > b->preferred_base)
+               return 1;
        if (a->size < b->size)
                return -1;
        if (a->size > b->size)
@@ -632,6 +824,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 {
        struct object_entry *cur_entry = cur->entry;
        struct object_entry *old_entry = old->entry;
+       int old_preferred = (old_entry->preferred_base ||
+                            old_entry->based_on_preferred);
        unsigned long size, oldsize, delta_size, sizediff;
        long max_size;
        void *delta_buf;
@@ -640,9 +834,15 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
        if (cur_entry->type != old_entry->type)
                return -1;
 
-       /* If the current object is at edge, take the depth the objects
-        * that depend on the current object into account -- otherwise
-        * they would become too deep.
+       /* We do not compute delta to *create* objects we are not
+        * going to pack.
+        */
+       if (cur_entry->preferred_base)
+               return -1;
+
+       /* If the current object is at pack edge, take the depth the
+        * objects that depend on the current object into account --
+        * otherwise they would become too deep.
         */
        if (cur_entry->delta_child) {
                if (max_depth <= cur_entry->delta_limit)
@@ -651,11 +851,10 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
        }
 
        size = cur_entry->size;
-       if (size < 50)
-               return -1;
        oldsize = old_entry->size;
        sizediff = oldsize > size ? oldsize - size : size - oldsize;
-       if (sizediff > size / 8)
+
+       if (size < 50)
                return -1;
        if (old_entry->depth >= max_depth)
                return 0;
@@ -668,8 +867,27 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
         * delete).
         */
        max_size = size / 2 - 20;
-       if (cur_entry->delta)
-               max_size = cur_entry->delta_size-1;
+       if (cur_entry->delta) {
+               if (cur_entry->based_on_preferred) {
+                       if (old_preferred)
+                               max_size = cur_entry->delta_size-1;
+                       else
+                               /* trying with non-preferred one when we
+                                * already have a delta based on preferred
+                                * one is pointless.
+                                */
+                               return -1;
+               }
+               else if (!old_preferred)
+                       max_size = cur_entry->delta_size-1;
+               else
+                       /* otherwise...  even if delta with a
+                        * preferred one produces a bigger result than
+                        * what we currently have, which is based on a
+                        * non-preferred one, it is OK.
+                        */
+                       ;
+       }
        if (sizediff >= max_size)
                return -1;
        delta_buf = diff_delta(old->data, oldsize,
@@ -679,6 +897,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
        cur_entry->delta = old_entry;
        cur_entry->delta_size = delta_size;
        cur_entry->depth = old_entry->depth + 1;
+       cur_entry->based_on_preferred = old_preferred;
        free(delta_buf);
        return 0;
 }
@@ -701,7 +920,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
        i = nr_objects;
        idx = 0;
        if (progress)
-               fprintf(stderr, "Deltifying %d objects.\n", nr_objects);
+               fprintf(stderr, "Deltifying %d objects.\n", nr_result);
 
        while (--i >= 0) {
                struct object_entry *entry = list[i];
@@ -710,12 +929,14 @@ static void find_deltas(struct object_entry **list, int window, int depth)
                char type[10];
                int j;
 
-               processed++;
+               if (!entry->preferred_base)
+                       processed++;
+
                if (progress) {
-                       unsigned percent = processed * 100 / nr_objects;
+                       unsigned percent = processed * 100 / nr_result;
                        if (percent != last_percent || progress_update) {
                                fprintf(stderr, "%4u%% (%u/%u) done\r",
-                                       percent, processed, nr_objects);
+                                       percent, processed, nr_result);
                                progress_update = 0;
                                last_percent = percent;
                        }
@@ -895,44 +1116,40 @@ int main(int argc, char **argv)
        }
 
        while (fgets(line, sizeof(line), stdin) != NULL) {
-               unsigned int hash;
-               char *p;
                unsigned char sha1[20];
 
-               if (progress_update) {
-                       fprintf(stderr, "Counting objects...%d\r", nr_objects);
-                       progress_update = 0;
+               if (line[0] == '-') {
+                       if (get_sha1_hex(line+1, sha1))
+                               die("expected edge sha1, got garbage:\n %s",
+                                   line+1);
+                       add_preferred_base(sha1);
+                       continue;
                }
                if (get_sha1_hex(line, sha1))
                        die("expected sha1, got garbage:\n %s", line);
-               hash = 0;
-               p = line+40;
-               while (*p) {
-                       unsigned char c = *p++;
-                       if (isspace(c))
-                               continue;
-                       hash = hash * 11 + c;
-               }
-               add_object_entry(sha1, hash);
+               add_object_entry(sha1, name_hash(NULL, line+41), 0);
        }
        if (progress)
                fprintf(stderr, "Done counting %d objects.\n", nr_objects);
-       if (non_empty && !nr_objects)
+       sorted_by_sha = create_final_object_list();
+       if (non_empty && !nr_result)
                return 0;
 
-       sorted_by_sha = create_sorted_list(sha1_sort);
        SHA1_Init(&ctx);
        list = sorted_by_sha;
-       for (i = 0; i < nr_objects; i++) {
+       for (i = 0; i < nr_result; i++) {
                struct object_entry *entry = *list++;
                SHA1_Update(&ctx, entry->sha1, 20);
        }
        SHA1_Final(object_list_sha1, &ctx);
+       if (progress && (nr_objects != nr_result))
+               fprintf(stderr, "Result has %d objects.\n", nr_result);
 
        if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
                ;
        else {
-               prepare_pack(window, depth);
+               if (nr_result)
+                       prepare_pack(window, depth);
                if (progress && pack_to_stdout) {
                        /* the other end usually displays progress itself */
                        struct itimerval v = {{0,},};
@@ -948,6 +1165,6 @@ int main(int argc, char **argv)
        }
        if (progress)
                fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
-                       nr_objects, written, written_delta, reused, reused_delta);
+                       nr_result, written, written_delta, reused, reused_delta);
        return 0;
 }
index ee5f15a..67d2a48 100644 (file)
@@ -30,7 +30,7 @@ static const char rev_list_usage[] =
 "    --date-order\n"
 "  formatting output:\n"
 "    --parents\n"
-"    --objects\n"
+"    --objects | --objects-edge\n"
 "    --unpacked\n"
 "    --header | --pretty\n"
 "    --abbrev=nr | --no-abbrev\n"
@@ -44,6 +44,7 @@ static int bisect_list = 0;
 static int tag_objects = 0;
 static int tree_objects = 0;
 static int blob_objects = 0;
+static int edge_hint = 0;
 static int verbose_header = 0;
 static int abbrev = DEFAULT_ABBREV;
 static int show_parents = 0;
@@ -62,6 +63,36 @@ static int no_merges = 0;
 static const char **paths = NULL;
 static int remove_empty_trees = 0;
 
+struct name_path {
+       struct name_path *up;
+       int elem_len;
+       const char *elem;
+};
+
+static char *path_name(struct name_path *path, const char *name)
+{
+       struct name_path *p;
+       char *n, *m;
+       int nlen = strlen(name);
+       int len = nlen + 1;
+
+       for (p = path; p; p = p->up) {
+               if (p->elem_len)
+                       len += p->elem_len + 1;
+       }
+       n = xmalloc(len);
+       m = n + len - (nlen + 1);
+       strcpy(m, name);
+       for (p = path; p; p = p->up) {
+               if (p->elem_len) {
+                       m -= p->elem_len + 1;
+                       memcpy(m, p->elem, p->elem_len);
+                       m[p->elem_len] = '/';
+               }
+       }
+       return n;
+}
+
 static void show_commit(struct commit *commit)
 {
        commit->object.flags |= SHOWN;
@@ -173,17 +204,23 @@ static int process_commit(struct commit * commit)
        return CONTINUE;
 }
 
-static struct object_list **add_object(struct object *obj, struct object_list **p, const char *name)
+static struct object_list **add_object(struct object *obj,
+                                      struct object_list **p,
+                                      struct name_path *path,
+                                      const char *name)
 {
        struct object_list *entry = xmalloc(sizeof(*entry));
        entry->item = obj;
        entry->next = *p;
-       entry->name = name;
+       entry->name = path_name(path, name);
        *p = entry;
        return &entry->next;
 }
 
-static struct object_list **process_blob(struct blob *blob, struct object_list **p, const char *name)
+static struct object_list **process_blob(struct blob *blob,
+                                        struct object_list **p,
+                                        struct name_path *path,
+                                        const char *name)
 {
        struct object *obj = &blob->object;
 
@@ -192,13 +229,17 @@ static struct object_list **process_blob(struct blob *blob, struct object_list *
        if (obj->flags & (UNINTERESTING | SEEN))
                return p;
        obj->flags |= SEEN;
-       return add_object(obj, p, name);
+       return add_object(obj, p, path, name);
 }
 
-static struct object_list **process_tree(struct tree *tree, struct object_list **p, const char *name)
+static struct object_list **process_tree(struct tree *tree,
+                                        struct object_list **p,
+                                        struct name_path *path,
+                                        const char *name)
 {
        struct object *obj = &tree->object;
        struct tree_entry_list *entry;
+       struct name_path me;
 
        if (!tree_objects)
                return p;
@@ -207,15 +248,18 @@ static struct object_list **process_tree(struct tree *tree, struct object_list *
        if (parse_tree(tree) < 0)
                die("bad tree object %s", sha1_to_hex(obj->sha1));
        obj->flags |= SEEN;
-       p = add_object(obj, p, name);
+       p = add_object(obj, p, path, name);
+       me.up = path;
+       me.elem = name;
+       me.elem_len = strlen(name);
        entry = tree->entries;
        tree->entries = NULL;
        while (entry) {
                struct tree_entry_list *next = entry->next;
                if (entry->directory)
-                       p = process_tree(entry->item.tree, p, entry->name);
+                       p = process_tree(entry->item.tree, p, &me, entry->name);
                else
-                       p = process_blob(entry->item.blob, p, entry->name);
+                       p = process_blob(entry->item.blob, p, &me, entry->name);
                free(entry);
                entry = next;
        }
@@ -230,7 +274,7 @@ static void show_commit_list(struct commit_list *list)
        while (list) {
                struct commit *commit = pop_most_recent_commit(&list, SEEN);
 
-               p = process_tree(commit->tree, p, "");
+               p = process_tree(commit->tree, p, NULL, "");
                if (process_commit(commit) == STOP)
                        break;
        }
@@ -241,15 +285,15 @@ static void show_commit_list(struct commit_list *list)
                        continue;
                if (obj->type == tag_type) {
                        obj->flags |= SEEN;
-                       p = add_object(obj, p, name);
+                       p = add_object(obj, p, NULL, name);
                        continue;
                }
                if (obj->type == tree_type) {
-                       p = process_tree((struct tree *)obj, p, name);
+                       p = process_tree((struct tree *)obj, p, NULL, name);
                        continue;
                }
                if (obj->type == blob_type) {
-                       p = process_blob((struct blob *)obj, p, name);
+                       p = process_blob((struct blob *)obj, p, NULL, name);
                        continue;
                }
                die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
@@ -430,16 +474,32 @@ static struct commit_list *find_bisection(struct commit_list *list)
        return best;
 }
 
+static void mark_edge_parents_uninteresting(struct commit *commit)
+{
+       struct commit_list *parents;
+
+       for (parents = commit->parents; parents; parents = parents->next) {
+               struct commit *parent = parents->item;
+               if (!(parent->object.flags & UNINTERESTING))
+                       continue;
+               mark_tree_uninteresting(parent->tree);
+               if (edge_hint && !(parent->object.flags & SHOWN)) {
+                       parent->object.flags |= SHOWN;
+                       printf("-%s\n", sha1_to_hex(parent->object.sha1));
+               }
+       }
+}
+
 static void mark_edges_uninteresting(struct commit_list *list)
 {
        for ( ; list; list = list->next) {
-               struct commit_list *parents = list->item->parents;
+               struct commit *commit = list->item;
 
-               for ( ; parents; parents = parents->next) {
-                       struct commit *commit = parents->item;
-                       if (commit->object.flags & UNINTERESTING)
-                               mark_tree_uninteresting(commit->tree);
+               if (commit->object.flags & UNINTERESTING) {
+                       mark_tree_uninteresting(commit->tree);
+                       continue;
                }
+               mark_edge_parents_uninteresting(commit);
        }
 }
 
@@ -657,7 +717,7 @@ static struct commit_list *limit_list(struct commit_list *list)
 
 static void add_pending_object(struct object *obj, const char *name)
 {
-       add_object(obj, &pending_objects, name);
+       add_object(obj, &pending_objects, NULL, name);
 }
 
 static struct commit *get_commit_reference(const char *name, const unsigned char *sha1, unsigned int flags)
@@ -843,6 +903,13 @@ int main(int argc, const char **argv)
                        blob_objects = 1;
                        continue;
                }
+               if (!strcmp(arg, "--objects-edge")) {
+                       tag_objects = 1;
+                       tree_objects = 1;
+                       blob_objects = 1;
+                       edge_hint = 1;
+                       continue;
+               }
                if (!strcmp(arg, "--unpacked")) {
                        unpacked = 1;
                        limited = 1;
index a5fb93c..610eacb 100644 (file)
@@ -43,6 +43,7 @@ static int is_rev_argument(const char *arg)
                "--min-age=",
                "--no-merges",
                "--objects",
+               "--objects-edge",
                "--parents",
                "--pretty",
                "--show-breaks",
index b58bbab..f558386 100644 (file)
@@ -12,6 +12,7 @@ static const char *exec = "git-receive-pack";
 static int verbose = 0;
 static int send_all = 0;
 static int force_update = 0;
+static int use_thin_pack = 0;
 
 static int is_zero_sha1(const unsigned char *sha1)
 {
@@ -42,7 +43,10 @@ static void exec_rev_list(struct ref *refs)
        int i = 0, j;
 
        args[i++] = "rev-list"; /* 0 */
-       args[i++] = "--objects";        /* 1 */
+       if (use_thin_pack)      /* 1 */
+               args[i++] = "--objects-edge";
+       else
+               args[i++] = "--objects";
 
        /* First send the ones we care about most */
        for (ref = refs; ref; ref = ref->next) {
@@ -379,6 +383,10 @@ int main(int argc, char **argv)
                                verbose = 1;
                                continue;
                        }
+                       if (!strcmp(arg, "--thin")) {
+                               use_thin_pack = 1;
+                               continue;
+                       }
                        usage(send_pack_usage);
                }
                if (!dest) {
index 3606529..635abb3 100644 (file)
@@ -14,6 +14,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n
 #define MAX_HAS 256
 #define MAX_NEEDS 256
 static int nr_has = 0, nr_needs = 0, multi_ack = 0, nr_our_refs = 0;
+static int use_thin_pack = 0;
 static unsigned char has_sha1[MAX_HAS][20];
 static unsigned char needs_sha1[MAX_NEEDS][20];
 static unsigned int timeout = 0;
@@ -49,8 +50,10 @@ static void create_pack_file(void)
                char *buf;
                char **p;
 
-               if (create_full_pack)
+               if (create_full_pack) {
                        args = 10;
+                       use_thin_pack = 0; /* no point doing it */
+               }
                else
                        args = nr_has + nr_needs + 5;
                argv = xmalloc(args * sizeof(char *));
@@ -62,7 +65,7 @@ static void create_pack_file(void)
                close(fd[0]);
                close(fd[1]);
                *p++ = "rev-list";
-               *p++ = "--objects";
+               *p++ = use_thin_pack ? "--objects-edge" : "--objects";
                if (create_full_pack || MAX_NEEDS <= nr_needs)
                        *p++ = "--all";
                else {
@@ -192,6 +195,8 @@ static int receive_needs(void)
                            "expected to get sha, not '%s'", line);
                if (strstr(line+45, "multi_ack"))
                        multi_ack = 1;
+               if (strstr(line+45, "thin-pack"))
+                       use_thin_pack = 1;
 
                /* We have sent all our refs already, and the other end
                 * should have chosen out of them; otherwise they are
@@ -213,7 +218,7 @@ static int receive_needs(void)
 
 static int send_ref(const char *refname, const unsigned char *sha1)
 {
-       static char *capabilities = "multi_ack";
+       static char *capabilities = "multi_ack thin-pack";
        struct object *o = parse_object(sha1);
 
        if (!o)