Merge branch 'master' into next
[git.git] / diff-delta.c
index 0730b24..6762887 100644 (file)
@@ -30,8 +30,7 @@ struct index {
 
 static struct index ** delta_index(const unsigned char *buf,
                                   unsigned long bufsize,
-                                  unsigned long trg_bufsize,
-                                  unsigned int *hash_shift)
+                                  unsigned long trg_bufsize)
 {
        unsigned long hsize;
        unsigned int i, hshift, hlimit, *hash_count;
@@ -44,14 +43,17 @@ static struct index ** delta_index(const unsigned char *buf,
        for (i = 8; (1 << i) < hsize && i < 24; i += 2);
        hsize = 1 << i;
        hshift = (i - 8) / 2;
-       *hash_shift = hshift;
 
-       /* allocate lookup index */
-       mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
+       /*
+        * Allocate lookup index.  Note the first hash pointer
+        * is used to store the hash shift value.
+        */
+       mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
        if (!mem)
                return NULL;
        hash = mem;
-       entry = mem + hsize * sizeof(*hash);
+       *hash++ = (void *)hshift;
+       entry = mem + (1 + hsize) * sizeof(*hash);
        memset(hash, 0, hsize * sizeof(*hash));
 
        /* allocate an array to count hash entries */
@@ -88,26 +90,39 @@ static struct index ** delta_index(const unsigned char *buf,
 
        /*
         * Now make sure none of the hash buckets has more entries than
-        * we're willing to test.  Otherwise we short-circuit the entry
-        * list uniformly to still preserve a good repartition across
-        * the reference buffer.
+        * we're willing to test.  Otherwise we cull the entry list to
+        * limit identical three byte prefixes to still preserve a good
+        * repartition across the reference buffer.
         */
        for (i = 0; i < hsize; i++) {
+               struct index **list, *bucket, *remaining;
+               int cnt;
                if (hash_count[i] < hlimit)
                        continue;
-               entry = hash[i];
-               do {
-                       struct index *keep = entry;
-                       int skip = hash_count[i] / hlimit / 2;
-                       do {
-                               entry = entry->next;
-                       } while(--skip && entry);
-                       keep->next = entry;
-               } while(entry);
+
+               bucket = NULL;
+               list = &bucket;
+               remaining = hash[i];
+               cnt = 0;
+               while (cnt < hlimit && remaining) {
+                       struct index *this = remaining, *that;
+                       remaining = remaining->next;
+                       for (that = bucket; that; that = that->next) {
+                               if (!memcmp(that->ptr, this->ptr, 3))
+                                       break;
+                       }
+                       if (that)
+                               continue; /* discard */
+                       cnt++;
+                       *list = this;
+                       list = &(this->next);
+                       this->next = NULL;
+               }
+               hash[i] = bucket;
        }
        free(hash_count);
 
-       return hash;
+       return hash-1;
 }
 
 /* provide the size of the copy opcode given the block offset and size */
@@ -121,7 +136,8 @@ static struct index ** delta_index(const unsigned char *buf,
 void *diff_delta(void *from_buf, unsigned long from_size,
                 void *to_buf, unsigned long to_size,
                 unsigned long *delta_size,
-                unsigned long max_size)
+                unsigned long max_size,
+                void **from_index)
 {
        unsigned int i, outpos, outsize, inscnt, hash_shift;
        const unsigned char *ref_data, *ref_top, *data, *top;
@@ -130,9 +146,16 @@ void *diff_delta(void *from_buf, unsigned long from_size,
 
        if (!from_size || !to_size)
                return NULL;
-       hash = delta_index(from_buf, from_size, to_size, &hash_shift);
-       if (!hash)
-               return NULL;
+       if (from_index && *from_index) {
+               hash = *from_index;
+       } else {
+               hash = delta_index(from_buf, from_size, to_size);
+               if (!hash)
+                       return NULL;
+               if (from_index)
+                       *from_index = hash;
+       }
+       hash_shift = (unsigned int)(*hash++);
 
        outpos = 0;
        outsize = 8192;
@@ -140,7 +163,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
                outsize = max_size + MAX_OP_SIZE + 1;
        out = malloc(outsize);
        if (!out) {
-               free(hash);
+               if (!from_index)
+                       free(hash-1);
                return NULL;
        }
 
@@ -241,7 +265,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
                                out = realloc(out, outsize);
                        if (!out) {
                                free(tmp);
-                               free(hash);
+                               if (!from_index)
+                                       free(hash-1);
                                return NULL;
                        }
                }
@@ -250,7 +275,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
        if (inscnt)
                out[outpos - inscnt - 1] = inscnt;
 
-       free(hash);
+       if (!from_index)
+               free(hash-1);
        *delta_size = outpos;
        return out;
 }