binary diff: further updates.
[git.git] / diff.c
diff --git a/diff.c b/diff.c
index b54bbfa..bfe54c3 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -8,6 +8,7 @@
 #include "quote.h"
 #include "diff.h"
 #include "diffcore.h"
+#include "delta.h"
 #include "xdiff-interface.h"
 
 static int use_size_cache;
@@ -195,6 +196,56 @@ static int fn_out(void *priv, mmbuffer_t *mb, int nbuf)
        return 0;
 }
 
+static char *pprint_rename(const char *a, const char *b)
+{
+       const char *old = a;
+       const char *new = b;
+       char *name = NULL;
+       int pfx_length, sfx_length;
+       int len_a = strlen(a);
+       int len_b = strlen(b);
+
+       /* Find common prefix */
+       pfx_length = 0;
+       while (*old && *new && *old == *new) {
+               if (*old == '/')
+                       pfx_length = old - a + 1;
+               old++;
+               new++;
+       }
+
+       /* Find common suffix */
+       old = a + len_a;
+       new = b + len_b;
+       sfx_length = 0;
+       while (a <= old && b <= new && *old == *new) {
+               if (*old == '/')
+                       sfx_length = len_a - (old - a);
+               old--;
+               new--;
+       }
+
+       /*
+        * pfx{mid-a => mid-b}sfx
+        * {pfx-a => pfx-b}sfx
+        * pfx{sfx-a => sfx-b}
+        * name-a => name-b
+        */
+       if (pfx_length + sfx_length) {
+               name = xmalloc(len_a + len_b - pfx_length - sfx_length + 7);
+               sprintf(name, "%.*s{%.*s => %.*s}%s",
+                       pfx_length, a,
+                       len_a - pfx_length - sfx_length, a + pfx_length,
+                       len_b - pfx_length - sfx_length, b + pfx_length,
+                       a + len_a - sfx_length);
+       }
+       else {
+               name = xmalloc(len_a + len_b + 5);
+               sprintf(name, "%s => %s", a, b);
+       }
+       return name;
+}
+
 struct diffstat_t {
        struct xdiff_emit_state xm;
 
@@ -204,12 +255,14 @@ struct diffstat_t {
                char *name;
                unsigned is_unmerged:1;
                unsigned is_binary:1;
+               unsigned is_renamed:1;
                unsigned int added, deleted;
        } **files;
 };
 
 static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
-               const char *name)
+                                         const char *name_a,
+                                         const char *name_b)
 {
        struct diffstat_file *x;
        x = xcalloc(sizeof (*x), 1);
@@ -219,7 +272,12 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
                                diffstat->alloc * sizeof(x));
        }
        diffstat->files[diffstat->nr++] = x;
-       x->name = strdup(name);
+       if (name_b) {
+               x->name = pprint_rename(name_a, name_b);
+               x->is_renamed = 1;
+       }
+       else
+               x->name = strdup(name_a);
        return x;
 }
 
@@ -250,13 +308,14 @@ static void show_stats(struct diffstat_t* data)
        for (i = 0; i < data->nr; i++) {
                struct diffstat_file *file = data->files[i];
 
+               len = strlen(file->name);
+               if (max_len < len)
+                       max_len = len;
+
                if (file->is_binary || file->is_unmerged)
                        continue;
                if (max_change < file->added + file->deleted)
                        max_change = file->added + file->deleted;
-               len = strlen(file->name);
-               if (max_len < len)
-                       max_len = len;
        }
 
        for (i = 0; i < data->nr; i++) {
@@ -304,7 +363,8 @@ static void show_stats(struct diffstat_t* data)
                        printf(" %s%-*s |  Unmerged\n", prefix, len, name);
                        goto free_diffstat_file;
                }
-               else if (added + deleted == 0) {
+               else if (!data->files[i]->is_renamed &&
+                        (added + deleted == 0)) {
                        total_files--;
                        goto free_diffstat_file;
                }
@@ -332,6 +392,90 @@ static void show_stats(struct diffstat_t* data)
                        total_files, adds, dels);
 }
 
+static unsigned char *deflate_it(char *data,
+                                unsigned long size,
+                                unsigned long *result_size)
+{
+       int bound;
+       unsigned char *deflated;
+       z_stream stream;
+
+       memset(&stream, 0, sizeof(stream));
+       deflateInit(&stream, Z_BEST_COMPRESSION);
+       bound = deflateBound(&stream, size);
+       deflated = xmalloc(bound);
+       stream.next_out = deflated;
+       stream.avail_out = bound;
+
+       stream.next_in = (unsigned char *)data;
+       stream.avail_in = size;
+       while (deflate(&stream, Z_FINISH) == Z_OK)
+               ; /* nothing */
+       deflateEnd(&stream);
+       *result_size = stream.total_out;
+       return deflated;
+}
+
+static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
+{
+       void *cp;
+       void *delta;
+       void *deflated;
+       void *data;
+       unsigned long orig_size;
+       unsigned long delta_size;
+       unsigned long deflate_size;
+       unsigned long data_size;
+
+       printf("GIT binary patch\n");
+       /* We could do deflated delta, or we could do just deflated two,
+        * whichever is smaller.
+        */
+       delta = NULL;
+       deflated = deflate_it(two->ptr, two->size, &deflate_size);
+       if (one->size && two->size) {
+               delta = diff_delta(one->ptr, one->size,
+                                  two->ptr, two->size,
+                                  &delta_size, deflate_size);
+               if (delta) {
+                       void *to_free = delta;
+                       orig_size = delta_size;
+                       delta = deflate_it(delta, delta_size, &delta_size);
+                       free(to_free);
+               }
+       }
+
+       if (delta && delta_size < deflate_size) {
+               printf("delta %lu\n", orig_size);
+               free(deflated);
+               data = delta;
+               data_size = delta_size;
+       }
+       else {
+               printf("literal %lu\n", two->size);
+               free(delta);
+               data = deflated;
+               data_size = deflate_size;
+       }
+
+       /* emit data encoded in base85 */
+       cp = data;
+       while (data_size) {
+               int bytes = (52 < data_size) ? 52 : data_size;
+               char line[70];
+               data_size -= bytes;
+               if (bytes <= 26)
+                       line[0] = bytes + 'A' - 1;
+               else
+                       line[0] = bytes - 26 + 'a' - 1;
+               encode_85(line + 1, cp, bytes);
+               cp += bytes;
+               puts(line);
+       }
+       printf("\n");
+       free(data);
+}
+
 #define FIRST_FEW_BYTES 8000
 static int mmfile_is_binary(mmfile_t *mf)
 {
@@ -348,6 +492,7 @@ static void builtin_diff(const char *name_a,
                         struct diff_filespec *one,
                         struct diff_filespec *two,
                         const char *xfrm_msg,
+                        struct diff_options *o,
                         int complete_rewrite)
 {
        mmfile_t mf1, mf2;
@@ -392,8 +537,17 @@ static void builtin_diff(const char *name_a,
        if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
                die("unable to read files to diff");
 
-       if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2))
-               printf("Binary files %s and %s differ\n", lbl[0], lbl[1]);
+       if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) {
+               /* Quite common confusing case */
+               if (mf1.size == mf2.size &&
+                   !memcmp(mf1.ptr, mf2.ptr, mf1.size))
+                       goto free_ab_and_return;
+               if (o->binary)
+                       emit_binary_diff(&mf1, &mf2);
+               else
+                       printf("Binary files %s and %s differ\n",
+                              lbl[0], lbl[1]);
+       }
        else {
                /* Crazy xdl interfaces.. */
                const char *diffopts = getenv("GIT_DIFF_OPTS");
@@ -424,19 +578,27 @@ static void builtin_diff(const char *name_a,
 }
 
 static void builtin_diffstat(const char *name_a, const char *name_b,
-               struct diff_filespec *one, struct diff_filespec *two,
-               struct diffstat_t *diffstat)
+                            struct diff_filespec *one,
+                            struct diff_filespec *two,
+                            struct diffstat_t *diffstat,
+                            int complete_rewrite)
 {
        mmfile_t mf1, mf2;
        struct diffstat_file *data;
 
-       data = diffstat_add(diffstat, name_a ? name_a : name_b);
+       data = diffstat_add(diffstat, name_a, name_b);
 
        if (!one || !two) {
                data->is_unmerged = 1;
                return;
        }
-
+       if (complete_rewrite) {
+               diff_populate_filespec(one, 0);
+               diff_populate_filespec(two, 0);
+               data->deleted = count_lines(one->data, one->size);
+               data->added = count_lines(two->data, two->size);
+               return;
+       }
        if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
                die("unable to read files to diff");
 
@@ -861,6 +1023,7 @@ static void run_diff_cmd(const char *pgm,
                         struct diff_filespec *one,
                         struct diff_filespec *two,
                         const char *xfrm_msg,
+                        struct diff_options *o,
                         int complete_rewrite)
 {
        if (pgm) {
@@ -870,7 +1033,7 @@ static void run_diff_cmd(const char *pgm,
        }
        if (one && two)
                builtin_diff(name, other ? other : name,
-                            one, two, xfrm_msg, complete_rewrite);
+                            one, two, xfrm_msg, o, complete_rewrite);
        else
                printf("* Unmerged path %s\n", name);
 }
@@ -904,7 +1067,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
 
        if (DIFF_PAIR_UNMERGED(p)) {
                /* unmerged */
-               run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0);
+               run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, o, 0);
                return;
        }
 
@@ -951,14 +1114,12 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
        }
 
        if (memcmp(one->sha1, two->sha1, 20)) {
-               char one_sha1[41];
                int abbrev = o->full_index ? 40 : DEFAULT_ABBREV;
-               memcpy(one_sha1, sha1_to_hex(one->sha1), 41);
 
                len += snprintf(msg + len, sizeof(msg) - len,
                                "index %.*s..%.*s",
-                               abbrev, one_sha1, abbrev,
-                               sha1_to_hex(two->sha1));
+                               abbrev, sha1_to_hex(one->sha1),
+                               abbrev, sha1_to_hex(two->sha1));
                if (one->mode == two->mode)
                        len += snprintf(msg + len, sizeof(msg) - len,
                                        " %06o", one->mode);
@@ -976,14 +1137,14 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
                 * needs to be split into deletion and creation.
                 */
                struct diff_filespec *null = alloc_filespec(two->path);
-               run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0);
+               run_diff_cmd(NULL, name, other, one, null, xfrm_msg, o, 0);
                free(null);
                null = alloc_filespec(one->path);
-               run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0);
+               run_diff_cmd(NULL, name, other, null, two, xfrm_msg, o, 0);
                free(null);
        }
        else
-               run_diff_cmd(pgm, name, other, one, two, xfrm_msg,
+               run_diff_cmd(pgm, name, other, one, two, xfrm_msg, o,
                             complete_rewrite);
 
        free(name_munged);
@@ -991,14 +1152,15 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
 }
 
 static void run_diffstat(struct diff_filepair *p, struct diff_options *o,
-               struct diffstat_t *diffstat)
+                        struct diffstat_t *diffstat)
 {
        const char *name;
        const char *other;
+       int complete_rewrite = 0;
 
        if (DIFF_PAIR_UNMERGED(p)) {
                /* unmerged */
-               builtin_diffstat(p->one->path, NULL, NULL, NULL, diffstat);
+               builtin_diffstat(p->one->path, NULL, NULL, NULL, diffstat, 0);
                return;
        }
 
@@ -1008,7 +1170,9 @@ static void run_diffstat(struct diff_filepair *p, struct diff_options *o,
        diff_fill_sha1_info(p->one);
        diff_fill_sha1_info(p->two);
 
-       builtin_diffstat(name, other, p->one, p->two, diffstat);
+       if (p->status == DIFF_STATUS_MODIFIED && p->score)
+               complete_rewrite = 1;
+       builtin_diffstat(name, other, p->one, p->two, diffstat, complete_rewrite);
 }
 
 void diff_setup(struct diff_options *options)
@@ -1029,6 +1193,15 @@ int diff_setup_done(struct diff_options *options)
             options->detect_rename != DIFF_DETECT_COPY) ||
            (0 <= options->rename_limit && !options->detect_rename))
                return -1;
+
+       /*
+        * These cases always need recursive; we do not drop caller-supplied
+        * recursive bits for other formats here.
+        */
+       if ((options->output_format == DIFF_FORMAT_PATCH) ||
+           (options->output_format == DIFF_FORMAT_DIFFSTAT))
+               options->recursive = 1;
+
        if (options->detect_rename && options->rename_limit < 0)
                options->rename_limit = diff_rename_limit_default;
        if (options->setup & DIFF_SETUP_USE_CACHE) {
@@ -1070,6 +1243,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
                options->rename_limit = strtoul(arg+2, NULL, 10);
        else if (!strcmp(arg, "--full-index"))
                options->full_index = 1;
+       else if (!strcmp(arg, "--binary")) {
+               options->output_format = DIFF_FORMAT_PATCH;
+               options->full_index = options->binary = 1;
+       }
        else if (!strcmp(arg, "--name-only"))
                options->output_format = DIFF_FORMAT_NAME;
        else if (!strcmp(arg, "--name-status"))
@@ -1364,7 +1541,7 @@ static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
 }
 
 static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
-               struct diffstat_t *diffstat)
+                           struct diffstat_t *diffstat)
 {
        if (diff_unmodified_pair(p))
                return;
@@ -1549,7 +1726,7 @@ void diff_flush(struct diff_options *options)
                for (i = 0; i < q->nr; i++) {
                        struct diff_filepair *p = q->queue[i];
                        flush_one_pair(p, DIFF_FORMAT_DIFFSTAT, options,
-                                       diffstat);
+                                      diffstat);
                }
                show_stats(diffstat);
                free(diffstat);