static struct index ** delta_index(const unsigned char *buf,
unsigned long bufsize,
- unsigned long trg_bufsize,
- unsigned int *hash_shift)
+ unsigned long trg_bufsize)
{
unsigned long hsize;
unsigned int i, hshift, hlimit, *hash_count;
for (i = 8; (1 << i) < hsize && i < 24; i += 2);
hsize = 1 << i;
hshift = (i - 8) / 2;
- *hash_shift = hshift;
- /* allocate lookup index */
- mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
+ /*
+ * Allocate lookup index. Note the first hash pointer
+ * is used to store the hash shift value.
+ */
+ mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
if (!mem)
return NULL;
hash = mem;
- entry = mem + hsize * sizeof(*hash);
+ *hash++ = (void *)hshift;
+ entry = mem + (1 + hsize) * sizeof(*hash);
memset(hash, 0, hsize * sizeof(*hash));
/* allocate an array to count hash entries */
/*
* Now make sure none of the hash buckets has more entries than
- * we're willing to test. Otherwise we short-circuit the entry
- * list uniformly to still preserve a good repartition across
- * the reference buffer.
+ * we're willing to test. Otherwise we cull the entry list to
+ * limit identical three byte prefixes to still preserve a good
+ * repartition across the reference buffer.
*/
for (i = 0; i < hsize; i++) {
+ struct index **list, *bucket, *remaining;
+ int cnt;
if (hash_count[i] < hlimit)
continue;
- entry = hash[i];
- do {
- struct index *keep = entry;
- int skip = hash_count[i] / hlimit / 2;
- do {
- entry = entry->next;
- } while(--skip && entry);
- keep->next = entry;
- } while(entry);
+
+ bucket = NULL;
+ list = &bucket;
+ remaining = hash[i];
+ cnt = 0;
+ while (cnt < hlimit && remaining) {
+ struct index *this = remaining, *that;
+ remaining = remaining->next;
+ for (that = bucket; that; that = that->next) {
+ if (!memcmp(that->ptr, this->ptr, 3))
+ break;
+ }
+ if (that)
+ continue; /* discard */
+ cnt++;
+ *list = this;
+ list = &(this->next);
+ this->next = NULL;
+ }
+ hash[i] = bucket;
}
free(hash_count);
- return hash;
+ return hash-1;
}
/* provide the size of the copy opcode given the block offset and size */
void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size,
unsigned long *delta_size,
- unsigned long max_size)
+ unsigned long max_size,
+ void **from_index)
{
unsigned int i, outpos, outsize, inscnt, hash_shift;
const unsigned char *ref_data, *ref_top, *data, *top;
if (!from_size || !to_size)
return NULL;
- hash = delta_index(from_buf, from_size, to_size, &hash_shift);
- if (!hash)
- return NULL;
+ if (from_index && *from_index) {
+ hash = *from_index;
+ } else {
+ hash = delta_index(from_buf, from_size, to_size);
+ if (!hash)
+ return NULL;
+ if (from_index)
+ *from_index = hash;
+ }
+ hash_shift = (unsigned int)(*hash++);
outpos = 0;
outsize = 8192;
outsize = max_size + MAX_OP_SIZE + 1;
out = malloc(outsize);
if (!out) {
- free(hash);
+ if (!from_index)
+ free(hash-1);
return NULL;
}
out = realloc(out, outsize);
if (!out) {
free(tmp);
- free(hash);
+ if (!from_index)
+ free(hash-1);
return NULL;
}
}
if (inscnt)
out[outpos - inscnt - 1] = inscnt;
- free(hash);
+ if (!from_index)
+ free(hash-1);
*delta_size = outpos;
return out;
}