sparse cleanup
[git.git] / diff.c
diff --git a/diff.c b/diff.c
index 0b704d3..21771e7 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -11,6 +11,8 @@
 
 static const char *diff_opts = "-pu";
 static unsigned char null_sha1[20] = { 0, };
+#define MAX_SCORE 10000
+#define DEFAULT_MINIMUM_SCORE 5000
 
 static const char *external_diff(void)
 {
@@ -55,7 +57,7 @@ static char *sq_expand(const char *src)
        const char *cp;
        char *bp;
 
-       /* count bytes needed to store the quoted string. */ 
+       /* count bytes needed to store the quoted string. */
        for (cnt = 1, cp = src; *cp; cnt++, cp++)
                if (*cp == '\'')
                        cnt += 3;
@@ -85,19 +87,18 @@ struct diff_spec {
        unsigned char blob_sha1[20];
        unsigned short mode;     /* file mode */
        unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode;
-                                 * however with a NULL SHA1, read them
-                                 * from the file system.
-                                 * if false, use the name and read mode from
+                                 * if false, use the name and read from
                                  * the filesystem.
                                  */
-       unsigned file_valid : 1; /* if false the file does not even exist */
+       unsigned file_valid : 1; /* if false the file does not exist */
 };
 
 static void builtin_diff(const char *name_a,
                         const char *name_b,
-                        struct diff_tempfile *temp)
+                        struct diff_tempfile *temp,
+                        int rename_score)
 {
-       int i, next_at;
+       int i, next_at, cmd_size;
        const char *diff_cmd = "diff -L'%s%s' -L'%s%s'";
        const char *diff_arg  = "'%s' '%s'||:"; /* "||:" is to return 0 */
        const char *input_name_sq[2];
@@ -114,7 +115,7 @@ static void builtin_diff(const char *name_a,
         * we use 2 spaces around diff-opts, and we need to count
         * terminating NUL, so we subtract 9 here.
         */
-       int cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
+       cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
                        strlen(diff_arg) - 9);
        for (i = 0; i < 2; i++) {
                input_name_sq[i] = sq_expand(temp[i].name);
@@ -151,6 +152,10 @@ static void builtin_diff(const char *name_a,
                        printf("new mode %s\n", temp[1].mode);
                }
                if (strcmp(name_a, name_b)) {
+                       if (0 < rename_score)
+                               printf("rename similarity index %d%%\n",
+                                      (int)(0.5+
+                                            rename_score*100.0/MAX_SCORE));
                        printf("rename old %s\n", name_a);
                        printf("rename new %s\n", name_b);
                }
@@ -186,7 +191,7 @@ static int work_tree_matches(const char *name, const unsigned char *sha1)
         * file.  Practically, this code only helps when we are used
         * by diff-cache --cached, which does read the cache before
         * calling us.
-        */ 
+        */
        if (!active_cache)
                return 0;
 
@@ -304,9 +309,10 @@ static void remove_tempfile_on_signal(int signo)
 
 static int detect_rename;
 static int reverse_diff;
+static int diff_raw_output = -1;
 static const char **pathspec;
 static int speccnt;
-static int diff_rename_minimum_score;
+static int minimum_score;
 
 static int matches_pathspec(const char *name)
 {
@@ -336,22 +342,14 @@ static int matches_pathspec(const char *name)
 static void run_external_diff(const char *name,
                              const char *other,
                              struct diff_spec *one,
-                             struct diff_spec *two)
+                             struct diff_spec *two,
+                             int rename_score)
 {
        struct diff_tempfile *temp = diff_temp;
        pid_t pid;
        int status;
        static int atexit_asked = 0;
 
-       if (reverse_diff) {
-               struct diff_spec *tmp_spec;
-               tmp_spec = one; one = two; two = tmp_spec;
-               if (other) {
-                       const char *tmp;
-                       tmp = name; name = other; other = tmp;
-               }
-       }
-
        if (!matches_pathspec(name) && (!other || !matches_pathspec(other)))
                return;
 
@@ -387,7 +385,7 @@ static void run_external_diff(const char *name,
                                *arg++ = temp[1].mode;
                                if (other)
                                        *arg++ = other;
-                               *arg = 0;
+                               *arg = NULL;
                                execvp(pgm, (char *const*) exec_arg);
                        }
                        else
@@ -397,7 +395,7 @@ static void run_external_diff(const char *name,
                 * otherwise we use the built-in one.
                 */
                if (one && two)
-                       builtin_diff(name, other ? : name, temp);
+                       builtin_diff(name, other ? : name, temp, rename_score);
                else
                        printf("* Unmerged path %s\n", name);
                exit(0);
@@ -448,7 +446,7 @@ static void hold_diff(const char *name,
                die("internal error");
 
        if (!detect_rename) {
-               run_external_diff(name, NULL, one, two);
+               run_external_diff(name, NULL, one, two, -1);
                return;
        }
        elem = xmalloc(sizeof(*elem) + strlen(name));
@@ -506,6 +504,7 @@ static void free_data(struct diff_spec_hold *s)
        else if (s->flags & SHOULD_MUNMAP)
                munmap(s->data, s->size);
        s->flags &= ~(SHOULD_FREE|SHOULD_MUNMAP);
+       s->data = NULL;
 }
 
 static void flush_remaining_diff(struct diff_spec_hold *elem,
@@ -520,10 +519,10 @@ static void flush_remaining_diff(struct diff_spec_hold *elem,
                        continue;
                if (on_created_list)
                        run_external_diff(elem->path, NULL,
-                                         &null_file_spec, &elem->it);
+                                         &null_file_spec, &elem->it, -1);
                else
                        run_external_diff(elem->path, NULL,
-                                         &elem->it, &null_file_spec);
+                                         &elem->it, &null_file_spec, -1);
        }
 }
 
@@ -542,28 +541,31 @@ static int is_exact_match(struct diff_spec_hold *src,
        return 0;
 }
 
-#define MINIMUM_SCORE 5000
-int estimate_similarity(struct diff_spec_hold *src, struct diff_spec_hold *dst)
+static int estimate_similarity(struct diff_spec_hold *src, struct diff_spec_hold *dst)
 {
        /* src points at a deleted file and dst points at a created
         * file.  They may be quite similar, in which case we want to
         * say src is renamed to dst.
         *
         * Compare them and return how similar they are, representing
-        * the score as an integer between 0 and 10000.  10000 is
-        * reserved for the case where they match exactly.
+        * the score as an integer between 0 and 10000, except
+        * where they match exactly it is considered better than anything
+        * else.
         */
        void *delta;
        unsigned long delta_size;
+       int score;
 
        delta_size = ((src->size < dst->size) ?
                      (dst->size - src->size) : (src->size - dst->size));
 
        /* We would not consider rename followed by more than
-        * 20% edits; that is, delta_size must be smaller than
-        * (src->size + dst->size)/2 * 0.2, which means...
+        * minimum_score/MAX_SCORE edits; that is, delta_size must be smaller
+        * than (src->size + dst->size)/2 * minimum_score/MAX_SCORE,
+        * which means...
         */
-       if ((src->size + dst->size) < delta_size * 10)
+
+       if ((src->size+dst->size)*minimum_score < delta_size*MAX_SCORE*2)
                return 0;
 
        delta = diff_delta(src->data, src->size,
@@ -574,14 +576,17 @@ int estimate_similarity(struct diff_spec_hold *src, struct diff_spec_hold *dst)
        /* This "delta" is really xdiff with adler32 and all the
         * overheads but it is a quick and dirty approximation.
         *
-        * Now we will give some score to it.  Let's say 20% edit gets
-        * 5000 points and 0% edit gets 9000 points.  That is, every
-        * 1/20000 edit gets 1 point penalty.  The amount of penalty is:
+        * Now we will give some score to it.  100% edit gets
+        * 0 points and 0% edit gets MAX_SCORE points.  That is, every
+        * 1/MAX_SCORE edit gets 1 point penalty.  The amount of penalty is:
         *
-        * (delta_size * 2 / (src->size + dst->size)) * 20000
+        * (delta_size * 2 / (src->size + dst->size)) * MAX_SCORE
         *
         */
-       return 9000 - (40000 * delta_size / (src->size+dst->size));
+       score = MAX_SCORE-(MAX_SCORE*2*delta_size/(src->size+dst->size));
+       if (score < 0) return 0;
+       if (MAX_SCORE < score) return MAX_SCORE;
+       return score;
 }
 
 struct diff_score {
@@ -597,14 +602,15 @@ static int score_compare(const void *a_, const void *b_)
 }
 
 static void flush_rename_pair(struct diff_spec_hold *src,
-                             struct diff_spec_hold *dst)
+                             struct diff_spec_hold *dst,
+                             int rename_score)
 {
        src->flags |= MATCHED;
        dst->flags |= MATCHED;
        free_data(src);
        free_data(dst);
        run_external_diff(src->path, dst->path,
-                         &src->it, &dst->it);
+                         &src->it, &dst->it, rename_score);
 }
 
 static void free_held_diff(struct diff_spec_hold *list)
@@ -625,12 +631,20 @@ void diff_flush(void)
 
        /* We really want to cull the candidates list early
         * with cheap tests in order to avoid doing deltas.
+        *
+        * With the current callers, we should not have already
+        * matched entries at this point, but it is nonetheless
+        * checked for sanity.
         */
        for (dst = createdfile; dst; dst = dst->next) {
+               if (dst->flags & MATCHED)
+                       continue;
                for (src = deletedfile; src; src = src->next) {
+                       if (src->flags & MATCHED)
+                               continue;
                        if (! is_exact_match(src, dst))
                                continue;
-                       flush_rename_pair(src, dst);
+                       flush_rename_pair(src, dst, MAX_SCORE);
                        break;
                }
        }
@@ -663,24 +677,30 @@ void diff_flush(void)
                }
                c++;
        }
-       qsort(mx, num_create*num_delete, sizeof(*mx), score_compare); 
+       qsort(mx, num_create*num_delete, sizeof(*mx), score_compare);
 
+#if 0
        for (c = 0; c < num_create * num_delete; c++) {
                src = mx[c].src;
                dst = mx[c].dst;
                if ((src->flags & MATCHED) || (dst->flags & MATCHED))
                        continue;
+               fprintf(stderr,
+                       "**score ** %d %s %s\n",
+                       mx[c].score, src->path, dst->path);
        }
+#endif
 
        for (c = 0; c < num_create * num_delete; c++) {
                src = mx[c].src;
                dst = mx[c].dst;
                if ((src->flags & MATCHED) || (dst->flags & MATCHED))
                        continue;
-               if (mx[c].score < diff_rename_minimum_score)
+               if (mx[c].score < minimum_score)
                        break;
-               flush_rename_pair(src, dst);
+               flush_rename_pair(src, dst, mx[c].score);
        }
+       free(mx);
 
  exit_path:
        flush_remaining_diff(createdfile, 1);
@@ -690,7 +710,26 @@ void diff_flush(void)
        createdfile = deletedfile = NULL;
 }
 
+int diff_scoreopt_parse(const char *opt)
+{
+       int diglen, num, scale, i;
+       if (opt[0] != '-' || opt[1] != 'M')
+               return -1; /* that is not -M option */
+       diglen = strspn(opt+2, "0123456789");
+       if (diglen == 0 || strlen(opt+2) != diglen)
+               return 0; /* use default */
+       sscanf(opt+2, "%d", &num);
+       for (i = 0, scale = 1; i < diglen; i++)
+               scale *= 10;
+
+       /* user says num divided by scale and we say internally that
+        * is MAX_SCORE * num / scale.
+        */
+       return MAX_SCORE * num / scale;
+}
+
 void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
+               int diff_raw_output_,
                const char **pathspec_, int speccnt_)
 {
        free_held_diff(createdfile);
@@ -700,8 +739,14 @@ void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
        detect_rename = detect_rename_;
        reverse_diff = reverse_diff_;
        pathspec = pathspec_;
+       diff_raw_output = diff_raw_output_;
        speccnt = speccnt_;
-       diff_rename_minimum_score = minimum_score_ ? : MINIMUM_SCORE;
+       minimum_score = minimum_score_ ? : DEFAULT_MINIMUM_SCORE;
+}
+
+static const char *git_object_type(unsigned mode)
+{
+       return S_ISDIR(mode) ? "tree" : "blob";
 }
 
 void diff_addremove(int addremove, unsigned mode,
@@ -711,6 +756,22 @@ void diff_addremove(int addremove, unsigned mode,
        char concatpath[PATH_MAX];
        struct diff_spec spec[2], *one, *two;
 
+       if (reverse_diff)
+               addremove = (addremove == '+' ? '-' : '+');
+
+       if (0 <= diff_raw_output) {
+               if (!path)
+                       path = "";
+               printf("%c%06o %s %s %s%s%c",
+                      addremove,
+                      mode,
+                      git_object_type(mode), sha1_to_hex(sha1),
+                      base, path, diff_raw_output);
+               return;
+       }
+       if (S_ISDIR(mode))
+               return;
+
        memcpy(spec[0].blob_sha1, sha1, 20);
        spec[0].mode = mode;
        spec[0].sha1_valid = !!memcmp(sha1, null_sha1, 20);
@@ -737,6 +798,29 @@ void diff_change(unsigned old_mode, unsigned new_mode,
        char concatpath[PATH_MAX];
        struct diff_spec spec[2];
 
+       if (reverse_diff) {
+               unsigned tmp;
+               const unsigned char *tmp_c;
+               tmp = old_mode; old_mode = new_mode; new_mode = tmp;
+               tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
+       }
+
+       if (0 <= diff_raw_output) {
+               char old_hex[41];
+               strcpy(old_hex, sha1_to_hex(old_sha1));
+
+               if (!path)
+                       path = "";
+               printf("*%06o->%06o %s %s->%s %s%s%c",
+                      old_mode, new_mode,
+                      git_object_type(new_mode),
+                      old_hex, sha1_to_hex(new_sha1),
+                      base, path, diff_raw_output);
+               return;
+       }
+       if (S_ISDIR(new_mode))
+               return;
+
        if (path) {
                strcpy(concatpath, base);
                strcat(concatpath, path);
@@ -753,10 +837,15 @@ void diff_change(unsigned old_mode, unsigned new_mode,
        /* We do not look at changed files as candidate for
         * rename detection ever.
         */
-       run_external_diff(path ? concatpath : base, NULL, &spec[0], &spec[1]);
+       run_external_diff(path ? concatpath : base, NULL,
+                         &spec[0], &spec[1], -1);
 }
 
 void diff_unmerge(const char *path)
 {
-       run_external_diff(path, NULL, NULL, NULL);
+       if (0 <= diff_raw_output) {
+               printf("U %s%c", path, diff_raw_output);
+               return;
+       }
+       run_external_diff(path, NULL, NULL, NULL, -1);
 }