+/*
+ * Idea here is very simple.
+ *
+ * We have total of (sz-N+1) N-byte overlapping sequences in buf whose
+ * size is sz. If the same N-byte sequence appears in both source and
+ * destination, we say the byte that starts that sequence is shared
+ * between them (i.e. copied from source to destination).
+ *
+ * For each possible N-byte sequence, if the source buffer has more
+ * instances of it than the destination buffer, that means the
+ * difference are the number of bytes not copied from source to
+ * destination. If the counts are the same, everything was copied
+ * from source to destination. If the destination has more,
+ * everything was copied, and destination added more.
+ *
+ * We are doing an approximation so we do not really have to waste
+ * memory by actually storing the sequence. We just hash them into
+ * somewhere around 2^16 hashbuckets and count the occurrences.
+ *
+ * The length of the sequence is arbitrarily set to 8 for now.
+ */
+
+/* Wild guess at the initial hash size */
+#define INITIAL_HASH_SIZE 9
+
+/* We leave more room in smaller hash but do not let it
+ * grow to have unused hole too much.
+ */
+#define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2))
+
+/* A prime rather carefully chosen between 2^16..2^17, so that
+ * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable
+ * size under the current 2<<17 maximum, which can hold this many
+ * different values before overflowing to hashtable of size 2<<18.
+ */
+#define HASHBASE 107927
+
+struct spanhash {
+ unsigned int hashval;
+ unsigned int cnt;
+};
+struct spanhash_top {
+ int alloc_log2;
+ int free;
+ struct spanhash data[FLEX_ARRAY];