Merge branch 'jc/delta' into next
authorJunio C Hamano <junkio@cox.net>
Fri, 3 Mar 2006 06:13:11 +0000 (22:13 -0800)
committerJunio C Hamano <junkio@cox.net>
Fri, 3 Mar 2006 06:13:11 +0000 (22:13 -0800)
* jc/delta:
  diff-delta: cull collided hash bucket more aggressively.

1  2 
diff-delta.c

diff --combined diff-delta.c
@@@ -30,7 -30,8 +30,7 @@@ struct index 
  
  static struct index ** delta_index(const unsigned char *buf,
                                   unsigned long bufsize,
 -                                 unsigned long trg_bufsize,
 -                                 unsigned int *hash_shift)
 +                                 unsigned long trg_bufsize)
  {
        unsigned long hsize;
        unsigned int i, hshift, hlimit, *hash_count;
        for (i = 8; (1 << i) < hsize && i < 24; i += 2);
        hsize = 1 << i;
        hshift = (i - 8) / 2;
 -      *hash_shift = hshift;
  
 -      /* allocate lookup index */
 -      mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
 +      /*
 +       * Allocate lookup index.  Note the first hash pointer
 +       * is used to store the hash shift value.
 +       */
 +      mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
        if (!mem)
                return NULL;
        hash = mem;
 -      entry = mem + hsize * sizeof(*hash);
 +      *hash++ = (void *)hshift;
 +      entry = mem + (1 + hsize) * sizeof(*hash);
        memset(hash, 0, hsize * sizeof(*hash));
  
        /* allocate an array to count hash entries */
  
        /*
         * Now make sure none of the hash buckets has more entries than
-        * we're willing to test.  Otherwise we short-circuit the entry
-        * list uniformly to still preserve a good repartition across
-        * the reference buffer.
+        * we're willing to test.  Otherwise we cull the entry list to
+        * limit identical three byte prefixes to still preserve a good
+        * repartition across the reference buffer.
         */
        for (i = 0; i < hsize; i++) {
+               struct index **list, *bucket, *remaining;
+               int cnt;
                if (hash_count[i] < hlimit)
                        continue;
-               entry = hash[i];
-               do {
-                       struct index *keep = entry;
-                       int skip = hash_count[i] / hlimit / 2;
-                       do {
-                               entry = entry->next;
-                       } while(--skip && entry);
-                       keep->next = entry;
-               } while(entry);
+               bucket = NULL;
+               list = &bucket;
+               remaining = hash[i];
+               cnt = 0;
+               while (cnt < hlimit && remaining) {
+                       struct index *this = remaining, *that;
+                       remaining = remaining->next;
+                       for (that = bucket; that; that = that->next) {
+                               if (!memcmp(that->ptr, this->ptr, 3))
+                                       break;
+                       }
+                       if (that)
+                               continue; /* discard */
+                       cnt++;
+                       *list = this;
+                       list = &(this->next);
+                       this->next = NULL;
+               }
+               hash[i] = bucket;
        }
        free(hash_count);
  
 -      return hash;
 +      return hash-1;
  }
  
  /* provide the size of the copy opcode given the block offset and size */
  void *diff_delta(void *from_buf, unsigned long from_size,
                 void *to_buf, unsigned long to_size,
                 unsigned long *delta_size,
 -               unsigned long max_size)
 +               unsigned long max_size,
 +               void **from_index)
  {
        unsigned int i, outpos, outsize, inscnt, hash_shift;
        const unsigned char *ref_data, *ref_top, *data, *top;
  
        if (!from_size || !to_size)
                return NULL;
 -      hash = delta_index(from_buf, from_size, to_size, &hash_shift);
 -      if (!hash)
 -              return NULL;
 +      if (from_index && *from_index) {
 +              hash = *from_index;
 +      } else {
 +              hash = delta_index(from_buf, from_size, to_size);
 +              if (!hash)
 +                      return NULL;
 +              if (from_index)
 +                      *from_index = hash;
 +      }
 +      hash_shift = (unsigned int)(*hash++);
  
        outpos = 0;
        outsize = 8192;
                outsize = max_size + MAX_OP_SIZE + 1;
        out = malloc(outsize);
        if (!out) {
 -              free(hash);
 +              if (!from_index)
 +                      free(hash-1);
                return NULL;
        }
  
                                out = realloc(out, outsize);
                        if (!out) {
                                free(tmp);
 -                              free(hash);
 +                              if (!from_index)
 +                                      free(hash-1);
                                return NULL;
                        }
                }
        if (inscnt)
                out[outpos - inscnt - 1] = inscnt;
  
 -      free(hash);
 +      if (!from_index)
 +              free(hash-1);
        *delta_size = outpos;
        return out;
  }