diff-delta: produce optimal pack data
[git.git] / diff-delta.c
1 /*
2  * diff-delta.c: generate a delta between two buffers
3  *
4  *  Many parts of this file have been lifted from LibXDiff version 0.10.
5  *  http://www.xmailserver.org/xdiff-lib.html
6  *
7  *  LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
8  *  Copyright (C) 2003  Davide Libenzi
9  *
10  *  Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
11  *
12  *  This file is free software; you can redistribute it and/or
13  *  modify it under the terms of the GNU Lesser General Public
14  *  License as published by the Free Software Foundation; either
15  *  version 2.1 of the License, or (at your option) any later version.
16  *
17  *  Use of this within git automatically means that the LGPL
18  *  licensing gets turned into GPLv2 within this project.
19  */
20
21 #include <stdlib.h>
22 #include <string.h>
23 #include "delta.h"
24
25
26 struct index {
27         const unsigned char *ptr;
28         struct index *next;
29 };
30
31 static struct index ** delta_index(const unsigned char *buf,
32                                    unsigned long bufsize,
33                                    unsigned int *hash_shift)
34 {
35         unsigned long hsize;
36         unsigned int hshift, i;
37         const unsigned char *data;
38         struct index *entry, **hash;
39         void *mem;
40
41         /* determine index hash size */
42         hsize = bufsize / 4;
43         for (i = 8; (1 << i) < hsize && i < 16; i++);
44         hsize = 1 << i;
45         hshift = i - 8;
46         *hash_shift = hshift;
47
48         /* allocate lookup index */
49         mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
50         if (!mem)
51                 return NULL;
52         hash = mem;
53         entry = mem + hsize * sizeof(*hash);
54         memset(hash, 0, hsize * sizeof(*hash));
55
56         /* then populate it */
57         data = buf + bufsize - 2;
58         while (data > buf) {
59                 entry->ptr = --data;
60                 i = data[0] ^ data[1] ^ (data[2] << hshift);
61                 entry->next = hash[i];
62                 hash[i] = entry++;
63         }
64
65         return hash;
66 }
67
68 /* provide the size of the copy opcode given the block offset and size */
69 #define COPYOP_SIZE(o, s) \
70     (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
71      !!(s & 0xff) + !!(s & 0xff00) + 1)
72
73 /* the maximum size for any opcode */
74 #define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
75
76 void *diff_delta(void *from_buf, unsigned long from_size,
77                  void *to_buf, unsigned long to_size,
78                  unsigned long *delta_size,
79                  unsigned long max_size)
80 {
81         unsigned int i, outpos, outsize, inscnt, hash_shift;
82         const unsigned char *ref_data, *ref_top, *data, *top;
83         unsigned char *out;
84         struct index *entry, **hash;
85
86         if (!from_size || !to_size)
87                 return NULL;
88         hash = delta_index(from_buf, from_size, &hash_shift);
89         if (!hash)
90                 return NULL;
91
92         outpos = 0;
93         outsize = 8192;
94         if (max_size && outsize >= max_size)
95                 outsize = max_size + MAX_OP_SIZE + 1;
96         out = malloc(outsize);
97         if (!out) {
98                 free(hash);
99                 return NULL;
100         }
101
102         ref_data = from_buf;
103         ref_top = from_buf + from_size;
104         data = to_buf;
105         top = to_buf + to_size;
106
107         /* store reference buffer size */
108         out[outpos++] = from_size;
109         from_size >>= 7;
110         while (from_size) {
111                 out[outpos - 1] |= 0x80;
112                 out[outpos++] = from_size;
113                 from_size >>= 7;
114         }
115
116         /* store target buffer size */
117         out[outpos++] = to_size;
118         to_size >>= 7;
119         while (to_size) {
120                 out[outpos - 1] |= 0x80;
121                 out[outpos++] = to_size;
122                 to_size >>= 7;
123         }
124
125         inscnt = 0;
126
127         while (data < top) {
128                 unsigned int moff = 0, msize = 0;
129                 if (data + 2 < top) {
130                         i = data[0] ^ data[1] ^ (data[2] << hash_shift);
131                         for (entry = hash[i]; entry; entry = entry->next) {
132                                 const unsigned char *ref = entry->ptr;
133                                 const unsigned char *src = data;
134                                 unsigned int ref_size = ref_top - ref;
135                                 if (ref_size > top - src)
136                                         ref_size = top - src;
137                                 if (ref_size > 0x10000)
138                                         ref_size = 0x10000;
139                                 if (ref_size <= msize)
140                                         break;
141                                 while (ref_size && *src++ == *ref) {
142                                         ref++;
143                                         ref_size--;
144                                 }
145                                 ref_size = ref - entry->ptr;
146                                 if (msize < ref - entry->ptr) {
147                                         /* this is our best match so far */
148                                         msize = ref - entry->ptr;
149                                         moff = entry->ptr - ref_data;
150                                 }
151                         }
152                 }
153
154                 if (!msize || msize < COPYOP_SIZE(moff, msize)) {
155                         if (!inscnt)
156                                 outpos++;
157                         out[outpos++] = *data++;
158                         inscnt++;
159                         if (inscnt == 0x7f) {
160                                 out[outpos - inscnt - 1] = inscnt;
161                                 inscnt = 0;
162                         }
163                 } else {
164                         unsigned char *op;
165
166                         if (inscnt) {
167                                 out[outpos - inscnt - 1] = inscnt;
168                                 inscnt = 0;
169                         }
170
171                         data += msize;
172                         op = out + outpos++;
173                         i = 0x80;
174
175                         if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
176                         moff >>= 8;
177                         if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
178                         moff >>= 8;
179                         if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
180                         moff >>= 8;
181                         if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
182
183                         if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
184                         msize >>= 8;
185                         if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
186
187                         *op = i;
188                 }
189
190                 if (outpos >= outsize - MAX_OP_SIZE) {
191                         void *tmp = out;
192                         outsize = outsize * 3 / 2;
193                         if (max_size && outsize >= max_size)
194                                 outsize = max_size + MAX_OP_SIZE + 1;
195                         if (max_size && outpos > max_size)
196                                 out = NULL;
197                         else
198                                 out = realloc(out, outsize);
199                         if (!out) {
200                                 free(tmp);
201                                 free(hash);
202                                 return NULL;
203                         }
204                 }
205         }
206
207         if (inscnt)
208                 out[outpos - inscnt - 1] = inscnt;
209
210         free(hash);
211         *delta_size = outpos;
212         return out;
213 }