git-tar-tree: no more void pointer arithmetic
[git.git] / diffcore-delta.c
index 471b98f..7338a40 100644 (file)
  */
 
 /* Wild guess at the initial hash size */
-#define INITIAL_HASH_SIZE 10
-#define HASHBASE 65537 /* next_prime(2^16) */
+#define INITIAL_HASH_SIZE 9
+
+/* We leave more room in smaller hash but do not let it
+ * grow to have unused hole too much.
+ */
+#define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2))
+
+/* A prime rather carefully chosen between 2^16..2^17, so that
+ * HASHBASE < INITIAL_FREE(17).  We want to keep the maximum hashtable
+ * size under the current 2<<17 maximum, which can hold this many
+ * different values before overflowing to hashtable of size 2<<18.
+ */
+#define HASHBASE 107927
 
 struct spanhash {
-       unsigned long hashval;
-       unsigned long cnt;
+       unsigned int hashval;
+       unsigned int cnt;
 };
 struct spanhash_top {
        int alloc_log2;
@@ -38,7 +49,8 @@ struct spanhash_top {
        struct spanhash data[FLEX_ARRAY];
 };
 
-static struct spanhash *spanhash_find(struct spanhash_top *top, unsigned long hashval)
+static struct spanhash *spanhash_find(struct spanhash_top *top,
+                                     unsigned int hashval)
 {
        int sz = 1 << top->alloc_log2;
        int bucket = hashval & (sz - 1);
@@ -62,7 +74,7 @@ static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig)
 
        new = xmalloc(sizeof(*orig) + sizeof(struct spanhash) * sz);
        new->alloc_log2 = orig->alloc_log2 + 1;
-       new->free = osz;
+       new->free = INITIAL_FREE(new->alloc_log2);
        memset(new->data, 0, sizeof(struct spanhash) * sz);
        for (i = 0; i < osz; i++) {
                struct spanhash *o = &(orig->data[i]);
@@ -87,7 +99,7 @@ static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig)
 }
 
 static struct spanhash_top *add_spanhash(struct spanhash_top *top,
-                                        unsigned long hashval)
+                                        unsigned int hashval, int cnt)
 {
        int bucket, lim;
        struct spanhash *h;
@@ -98,14 +110,14 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
                h = &(top->data[bucket++]);
                if (!h->cnt) {
                        h->hashval = hashval;
-                       h->cnt = 1;
+                       h->cnt = cnt;
                        top->free--;
                        if (top->free < 0)
                                return spanhash_rehash(top);
                        return top;
                }
                if (h->hashval == hashval) {
-                       h->cnt++;
+                       h->cnt += cnt;
                        return top;
                }
                if (lim <= bucket)
@@ -113,31 +125,33 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
        }
 }
 
-static struct spanhash_top *hash_chars(unsigned char *buf, unsigned long sz)
+static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
 {
-       int i;
-       unsigned long accum1, accum2, hashval;
+       int i, n;
+       unsigned int accum1, accum2, hashval;
        struct spanhash_top *hash;
 
        i = INITIAL_HASH_SIZE;
        hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<<i));
        hash->alloc_log2 = i;
-       hash->free = (1<<i)/2;
+       hash->free = INITIAL_FREE(i);
        memset(hash->data, 0, sizeof(struct spanhash) * (1<<i));
 
-       /* an 8-byte shift register made of accum1 and accum2.  New
-        * bytes come at LSB of accum2, and shifted up to accum1
-        */
-       for (i = accum1 = accum2 = 0; i < 7; i++, sz--) {
-               accum1 = (accum1 << 8) | (accum2 >> 24);
-               accum2 = (accum2 << 8) | *buf++;
-       }
+       n = 0;
+       accum1 = accum2 = 0;
        while (sz) {
-               accum1 = (accum1 << 8) | (accum2 >> 24);
-               accum2 = (accum2 << 8) | *buf++;
-               hashval = (accum1 + accum2 * 0x61) % HASHBASE;
-               hash = add_spanhash(hash, hashval);
+               unsigned int c = *buf++;
+               unsigned int old_1 = accum1;
                sz--;
+               accum1 = (accum1 << 7) ^ (accum2 >> 25);
+               accum2 = (accum2 << 7) ^ (old_1 >> 25);
+               accum1 += c;
+               if (++n < 64 && c != '\n')
+                       continue;
+               hashval = (accum1 + accum2 * 0x61) % HASHBASE;
+               hash = add_spanhash(hash, hashval, n);
+               n = 0;
+               accum1 = accum2 = 0;
        }
        return hash;
 }
@@ -154,9 +168,6 @@ int diffcore_count_changes(void *src, unsigned long src_size,
        struct spanhash_top *src_count, *dst_count;
        unsigned long sc, la;
 
-       if (src_size < 8 || dst_size < 8)
-               return -1;
-
        src_count = dst_count = NULL;
        if (src_count_p)
                src_count = *src_count_p;