[PATCH] Introducing software archaeologist's tool "pickaxe".
authorJunio C Hamano <junkio@cox.net>
Sat, 21 May 2005 09:40:01 +0000 (02:40 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Sat, 21 May 2005 16:58:03 +0000 (09:58 -0700)
This steals the "pickaxe" feature from JIT and make it available
to the bare Plumbing layer.  From the command line, the user
gives a string he is intersted in.

Using the diff-core infrastructure previously introduced, it
filters the differences to limit the output only to the diffs
between <src> and <dst> where the string appears only in one but
not in the other.  For example:

 $ ./git-rev-list HEAD | ./git-diff-tree -Sdiff-tree-helper --stdin -M

would show the diffs that touch the string "diff-tree-helper".

In real software-archaeologist application, you would typically
look for a few to several lines of code and see where that code
came from.

The "pickaxe" module runs after "rename/copy detection" module,
so it even crosses the file rename boundary, as the above
example demonstrates.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
14 files changed:
Documentation/git-diff-cache.txt
Documentation/git-diff-files.txt
Documentation/git-diff-helper.txt
Documentation/git-diff-tree.txt
Makefile
diff-cache.c
diff-files.c
diff-helper.c
diff-tree.c
diff.c
diff.h
diffcore-pickaxe.c [new file with mode: 0644]
diffcore-rename.c
diffcore.h

index e3ed713..2cfdffd 100644 (file)
@@ -9,7 +9,7 @@ git-diff-cache - Compares content and mode of blobs between the cache and reposi
 
 SYNOPSIS
 --------
-'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [--cached] <tree-ish>
+'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-S<string>] [--cached] <tree-ish>
 
 DESCRIPTION
 -----------
@@ -39,6 +39,10 @@ OPTIONS
 -C::
        Detect copies as well as renames; implies -p.
 
+-S<string>::
+       Look for differences that contains the change in <string>.
+
+
 -R::
        Output diff in reverse.
 
index 1eae3d0..51a3d0f 100644 (file)
@@ -9,7 +9,7 @@ git-diff-files - Compares files in the working tree and the cache
 
 SYNOPSIS
 --------
-'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [<pattern>...]
+'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S<string>] [<pattern>...]
 
 DESCRIPTION
 -----------
@@ -35,6 +35,10 @@ OPTIONS
 -C::
        Detect copies as well as renames; implies -p.
 
+-S<string>::
+       Look for differences that contains the change in <string>.
+
+
 -r::
        This flag does not mean anything.  It is there only to match
        git-diff-tree.  Unlike git-diff-tree, git-diff-files always looks
index 302789e..2036c6c 100644 (file)
@@ -9,7 +9,7 @@ git-diff-helper - Generates patch format output for git-diff-*
 
 SYNOPSIS
 --------
-'git-diff-helper' [-z] [-R] [-M] [-C]
+'git-diff-helper' [-z] [-R] [-M] [-C] [-S<string>]
 
 DESCRIPTION
 -----------
@@ -37,6 +37,10 @@ OPTIONS
 -C::
        Detect copies as well as renames.
 
+-S<string>::
+       Look for differences that contains the change in <string>.
+
+
 See Also
 --------
 The section on generating patches in link:git-diff-cache.html[git-diff-cache]
index f4e95a9..bdc8d5a 100644 (file)
@@ -9,7 +9,7 @@ git-diff-tree - Compares the content and mode of blobs found via two tree object
 
 SYNOPSIS
 --------
-'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-m] [-s] [-v] <tree-ish> <tree-ish> [<pattern>]\*
+'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-S<string>] [-m] [-s] [-v] <tree-ish> <tree-ish> [<pattern>]\*
 
 DESCRIPTION
 -----------
@@ -43,6 +43,9 @@ OPTIONS
 -R::
        Output diff in reverse.
 
+-S<string>::
+       Look for differences that contains the change in <string>.
+
 -r::
        recurse
 
index f61bfe0..b13edab 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ LIB_H += strbuf.h
 LIB_OBJS += strbuf.o
 
 LIB_H += diff.h
-LIB_OBJS += diff.o diffcore-rename.o
+LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o
 
 LIB_OBJS += gitenv.o
 
@@ -125,6 +125,7 @@ strbuf.o: $(LIB_H)
 gitenv.o: $(LIB_H)
 diff.o: $(LIB_H)
 diffcore-rename.o : $(LIB_H)
+diffcore-pickaxe.o : $(LIB_H)
 
 test: all
        make -C t/ all
index 383302a..a5deb8c 100644 (file)
@@ -8,6 +8,7 @@ static int line_termination = '\n';
 static int detect_rename = 0;
 static int reverse_diff = 0;
 static int diff_score_opt = 0;
+static char *pickaxe = 0;
 
 /* A file entry went away or appeared */
 static void show_file(const char *prefix, struct cache_entry *ce, unsigned char *sha1, unsigned int mode)
@@ -153,7 +154,7 @@ static void mark_merge_entries(void)
 }
 
 static char *diff_cache_usage =
-"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [--cached] <tree-ish>";
+"git-diff-cache [-p] [-r] [-z] [-m] [-M] [-C] [-R] [-S<string>] [--cached] <tree-ish>";
 
 int main(int argc, char **argv)
 {
@@ -194,6 +195,10 @@ int main(int argc, char **argv)
                        reverse_diff = 1;
                        continue;
                }
+               if (!strcmp(arg, "-S")) {
+                       pickaxe = arg + 2;
+                       continue;
+               }
                if (!strcmp(arg, "-m")) {
                        match_nonexisting = 1;
                        continue;
@@ -208,8 +213,8 @@ int main(int argc, char **argv)
        if (argc != 2 || get_sha1(argv[1], tree_sha1))
                usage(diff_cache_usage);
 
-       diff_setup(detect_rename, diff_score_opt, reverse_diff,
-                  (generate_patch ? -1 : line_termination),
+       diff_setup(detect_rename, diff_score_opt, pickaxe,
+                  reverse_diff, (generate_patch ? -1 : line_termination),
                   NULL, 0);
 
        mark_merge_entries();
index d020254..d3b80a0 100644 (file)
@@ -7,13 +7,14 @@
 #include "diff.h"
 
 static const char *diff_files_usage =
-"git-diff-files [-p] [-q] [-r] [-z] [-M] [-C] [-R] [paths...]";
+"git-diff-files [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-S<string>] [paths...]";
 
 static int generate_patch = 0;
 static int line_termination = '\n';
 static int detect_rename = 0;
 static int reverse_diff = 0;
 static int diff_score_opt = 0;
+static char *pickaxe = 0;
 static int silent = 0;
 
 static int matches_pathspec(struct cache_entry *ce, char **spec, int cnt)
@@ -67,6 +68,8 @@ int main(int argc, char **argv)
                        line_termination = 0;
                else if (!strcmp(argv[1], "-R"))
                        reverse_diff = 1;
+               else if (!strcmp(argv[1], "-S"))
+                       pickaxe = argv[1] + 2;
                else if (!strncmp(argv[1], "-M", 2)) {
                        diff_score_opt = diff_scoreopt_parse(argv[1]);
                        detect_rename = generate_patch = 1;
@@ -89,8 +92,8 @@ int main(int argc, char **argv)
                exit(1);
        }
 
-       diff_setup(detect_rename, diff_score_opt, reverse_diff,
-                  (generate_patch ? -1 : line_termination),
+       diff_setup(detect_rename, diff_score_opt, pickaxe,
+                  reverse_diff, (generate_patch ? -1 : line_termination),
                   NULL, 0);
 
        for (i = 0; i < entries; i++) {
index 4e966db..568d5ae 100644 (file)
@@ -9,6 +9,7 @@
 static int detect_rename = 0;
 static int diff_score_opt = 0;
 static int generate_patch = 1;
+static char *pickaxe = 0;
 
 static int parse_oneside_change(const char *cp, int *mode,
                                unsigned char *sha1, char *path)
@@ -93,7 +94,7 @@ static int parse_diff_raw_output(const char *buf)
 }
 
 static const char *diff_helper_usage =
-       "git-diff-helper [-z] [-R] [-M] [-C] paths...";
+       "git-diff-helper [-z] [-R] [-M] [-C] [-S<string>] paths...";
 
 int main(int ac, const char **av) {
        struct strbuf sb;
@@ -117,14 +118,17 @@ int main(int ac, const char **av) {
                        detect_rename = 2;
                        diff_score_opt = diff_scoreopt_parse(av[1]);
                }
+               else if (av[1][1] == 'S') {
+                       pickaxe = av[1] + 2;
+               }
                else
                        usage(diff_helper_usage);
                ac--; av++;
        }
        /* the remaining parameters are paths patterns */
 
-       diff_setup(detect_rename, diff_score_opt, reverse,
-                  (generate_patch ? -1 : line_termination),
+       diff_setup(detect_rename, diff_score_opt, pickaxe,
+                  reverse, (generate_patch ? -1 : line_termination),
                   av+1, ac-1);
 
        while (1) {
index aa49446..233a250 100644 (file)
@@ -13,6 +13,7 @@ static int generate_patch = 0;
 static int detect_rename = 0;
 static int reverse_diff = 0;
 static int diff_score_opt = 0;
+static char *pickaxe = 0;
 static const char *header = NULL;
 static const char *header_prefix = "";
 
@@ -271,8 +272,8 @@ static int diff_tree_sha1_top(const unsigned char *old,
 {
        int ret;
 
-       diff_setup(detect_rename, diff_score_opt, reverse_diff,
-                  (generate_patch ? -1 : line_termination),
+       diff_setup(detect_rename, diff_score_opt, pickaxe,
+                  reverse_diff, (generate_patch ? -1 : line_termination),
                   NULL, 0);
        ret = diff_tree_sha1(old, new, base);
        diff_flush();
@@ -285,8 +286,8 @@ static int diff_root_tree(const unsigned char *new, const char *base)
        void *tree;
        unsigned long size;
 
-       diff_setup(detect_rename, diff_score_opt, reverse_diff,
-                  (generate_patch ? -1 : line_termination),
+       diff_setup(detect_rename, diff_score_opt, pickaxe,
+                  reverse_diff, (generate_patch ? -1 : line_termination),
                   NULL, 0);
        tree = read_object_with_reference(new, "tree", &size, NULL);
        if (!tree)
@@ -430,7 +431,7 @@ static int diff_tree_stdin(char *line)
 }
 
 static char *diff_tree_usage =
-"git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-m] [-s] [-v] <tree-ish> <tree-ish>";
+"git-diff-tree [-p] [-r] [-z] [--stdin] [-M] [-C] [-R] [-S<string>] [-m] [-s] [-v] <tree-ish> <tree-ish>";
 
 int main(int argc, char **argv)
 {
@@ -473,6 +474,10 @@ int main(int argc, char **argv)
                        recursive = generate_patch = 1;
                        continue;
                }
+               if (!strncmp(arg, "-S", 2)) {
+                       pickaxe = arg + 2;
+                       continue;
+               }
                if (!strncmp(arg, "-M", 2)) {
                        detect_rename = recursive = generate_patch = 1;
                        diff_score_opt = diff_scoreopt_parse(arg);
diff --git a/diff.c b/diff.c
index d908ef3..13d5e3e 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -17,6 +17,7 @@ static int reverse_diff;
 static int diff_raw_output = -1;
 static const char **pathspec;
 static int speccnt;
+static const char *pickaxe;
 static int minimum_score;
 
 static const char *external_diff(void)
@@ -511,8 +512,9 @@ int diff_scoreopt_parse(const char *opt)
        return MAX_SCORE * num / scale;
 }
 
-void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
-               int diff_raw_output_,
+void diff_setup(int detect_rename_, int minimum_score_,
+               char *pickaxe_,
+               int reverse_diff_, int diff_raw_output_,
                const char **pathspec_, int speccnt_)
 {
        detect_rename = detect_rename_;
@@ -521,15 +523,16 @@ void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
        diff_raw_output = diff_raw_output_;
        speccnt = speccnt_;
        minimum_score = minimum_score_ ? : DEFAULT_MINIMUM_SCORE;
+       pickaxe = pickaxe_;
 }
 
 static struct diff_queue_struct queued_diff;
 
-struct diff_file_pair *diff_queue(struct diff_queue_struct *queue,
+struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
                                  struct diff_filespec *one,
                                  struct diff_filespec *two)
 {
-       struct diff_file_pair *dp = xmalloc(sizeof(*dp));
+       struct diff_filepair *dp = xmalloc(sizeof(*dp));
        dp->one = one;
        dp->two = two;
        dp->xfrm_msg = 0;
@@ -549,7 +552,7 @@ static const char *git_object_type(unsigned mode)
        return S_ISDIR(mode) ? "tree" : "blob";
 }
 
-static void diff_flush_raw(struct diff_file_pair *p)
+static void diff_flush_raw(struct diff_filepair *p)
 {
        struct diff_filespec *it;
        int addremove;
@@ -583,7 +586,7 @@ static void diff_flush_raw(struct diff_file_pair *p)
               sha1_to_hex(it->sha1), it->path, diff_raw_output);
 }
 
-static void diff_flush_patch(struct diff_file_pair *p)
+static void diff_flush_patch(struct diff_filepair *p)
 {
        const char *name, *other;
 
@@ -600,7 +603,7 @@ static int identical(struct diff_filespec *one, struct diff_filespec *two)
 {
        /* This function is written stricter than necessary to support
         * the currently implemented transformers, but the idea is to
-        * let transformers to produce diff_file_pairs any way they want,
+        * let transformers to produce diff_filepairs any way they want,
         * and filter and clean them up here before producing the output.
         */
 
@@ -623,7 +626,7 @@ static int identical(struct diff_filespec *one, struct diff_filespec *two)
        return 0;
 }
 
-static void diff_flush_one(struct diff_file_pair *p)
+static void diff_flush_one(struct diff_filepair *p)
 {
        if (identical(p->one, p->two))
                return;
@@ -640,11 +643,13 @@ void diff_flush(void)
 
        if (detect_rename)
                diff_detect_rename(q, detect_rename, minimum_score);
+       if (pickaxe)
+               diff_pickaxe(q, pickaxe);
        for (i = 0; i < q->nr; i++)
                diff_flush_one(q->queue[i]);
 
        for (i = 0; i < q->nr; i++) {
-               struct diff_file_pair *p = q->queue[i];
+               struct diff_filepair *p = q->queue[i];
                diff_free_filespec_data(p->one);
                diff_free_filespec_data(p->two);
                free(p->xfrm_msg);
diff --git a/diff.h b/diff.h
index 86a645a..97d39ac 100644 (file)
--- a/diff.h
+++ b/diff.h
@@ -20,6 +20,7 @@ extern void diff_unmerge(const char *path);
 extern int diff_scoreopt_parse(const char *opt);
 
 extern void diff_setup(int detect_rename, int minimum_score,
+                      char *pickaxe,
                       int reverse, int raw_output,
                       const char **spec, int cnt);
 
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
new file mode 100644 (file)
index 0000000..ee22e36
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "delta.h"
+
+static int contains(struct diff_filespec *one,
+                   const char *needle, unsigned long len)
+{
+       unsigned long offset, sz;
+       const char *data;
+       if (diff_populate_filespec(one))
+               return 0;
+       sz = one->size;
+       data = one->data;
+       for (offset = 0; offset + len <= sz; offset++)
+                    if (!strncmp(needle, data + offset, len))
+                            return 1;
+       return 0;
+}
+
+void diff_pickaxe(struct diff_queue_struct *q, const char *needle)
+{
+       unsigned long len = strlen(needle);
+       int i;
+       struct diff_queue_struct outq;
+       outq.queue = NULL;
+       outq.nr = outq.alloc = 0;
+
+       for (i = 0; i < q->nr; i++) {
+               struct diff_filepair *p = q->queue[i];
+               if (!p->one->file_valid) {
+                       if (!p->two->file_valid)
+                               continue; /* ignore nonsense */
+                       /* created */
+                       if (contains(p->two, needle, len))
+                               diff_queue(&outq, p->one, p->two);
+               }
+               else if (!p->two->file_valid) {
+                       if (contains(p->one, needle, len))
+                               diff_queue(&outq, p->one, p->two);
+               }
+               else if (contains(p->one, needle, len) !=
+                        contains(p->two, needle, len))
+                       diff_queue(&outq, p->one, p->two);
+       }
+       for (i = 0; i < q->nr; i++) {
+               struct diff_filepair *p = q->queue[i];
+               free(p);
+       }
+       free(q->queue);
+       *q = outq;
+       return;
+}
index 8aa8f84..6dd753b 100644 (file)
@@ -129,7 +129,7 @@ static void record_rename_pair(struct diff_queue_struct *outq,
         * To achieve this sort order, we give xform_work the number
         * above.
         */
-       struct diff_file_pair *dp = diff_queue(outq, src, dst);
+       struct diff_filepair *dp = diff_queue(outq, src, dst);
        dp->xfrm_work = (rank * 2 + 1) | (score<<RENAME_SCORE_SHIFT);
        dst->xfrm_flags |= RENAME_DST_MATCHED;
 }
@@ -148,7 +148,7 @@ static void debug_filespec(struct diff_filespec *s, int x, const char *one)
                s->size, s->xfrm_flags);
 }
 
-static void debug_filepair(const struct diff_file_pair *p, int i)
+static void debug_filepair(const struct diff_filepair *p, int i)
 {
        debug_filespec(p->one, i, "one");
        debug_filespec(p->two, i, "two");
@@ -165,7 +165,7 @@ static void debug_queue(const char *msg, struct diff_queue_struct *q)
                fprintf(stderr, "%s\n", msg);
        fprintf(stderr, "q->nr = %d\n", q->nr);
        for (i = 0; i < q->nr; i++) {
-               struct diff_file_pair *p = q->queue[i];
+               struct diff_filepair *p = q->queue[i];
                debug_filepair(p, i);
        }
 }
@@ -180,8 +180,8 @@ static void debug_queue(const char *msg, struct diff_queue_struct *q)
  */
 static int rank_compare(const void *a_, const void *b_)
 {
-       const struct diff_file_pair *a = *(const struct diff_file_pair **)a_;
-       const struct diff_file_pair *b = *(const struct diff_file_pair **)b_;
+       const struct diff_filepair *a = *(const struct diff_filepair **)a_;
+       const struct diff_filepair *b = *(const struct diff_filepair **)b_;
        int a_rank = a->xfrm_work & ((1<<RENAME_SCORE_SHIFT) - 1);
        int b_rank = b->xfrm_work & ((1<<RENAME_SCORE_SHIFT) - 1);
 
@@ -207,7 +207,7 @@ static int needs_to_stay(struct diff_queue_struct *q, int i,
         * as the source of rename/copy), we need to copy, not rename.
         */
        while (i < q->nr) {
-               struct diff_file_pair *p = q->queue[i++];
+               struct diff_filepair *p = q->queue[i++];
                if (!p->two->file_valid)
                        continue; /* removed is fine */
                if (strcmp(p->one->path, it->path))
@@ -243,15 +243,8 @@ void diff_detect_rename(struct diff_queue_struct *q,
        srcs[0] = &deleted;
        srcs[1] = &stay;
 
-       /* NEEDSWORK:
-        * (1) make sure we properly ignore but pass trees.
-        *
-        * (2) make sure we do right thing on the same path deleted
-        *     and created in the same patch.
-        */
-
        for (i = 0; i < q->nr; i++) {
-               struct diff_file_pair *p = q->queue[i];
+               struct diff_filepair *p = q->queue[i];
                if (!p->one->file_valid)
                        if (!p->two->file_valid)
                                continue; /* ignore nonsense */
@@ -340,11 +333,11 @@ void diff_detect_rename(struct diff_queue_struct *q,
         * See comments at the top of record_rename_pair for numbers used
         * to assign xfrm_work.
         *
-        * Note that we have not annotated the diff_file_pair with any comment
+        * Note that we have not annotated the diff_filepair with any comment
         * so there is nothing other than p to free.
         */
        for (i = 0; i < q->nr; i++) {
-               struct diff_file_pair *dp, *p = q->queue[i];
+               struct diff_filepair *dp, *p = q->queue[i];
                if (!p->one->file_valid) {
                        if (p->two->file_valid) {
                                /* creation */
@@ -378,7 +371,7 @@ void diff_detect_rename(struct diff_queue_struct *q,
 
        /* Copy it out to q, removing duplicates. */
        for (i = 0; i < outq.nr; i++) {
-               struct diff_file_pair *p = outq.queue[i];
+               struct diff_filepair *p = outq.queue[i];
                if (!p->one->file_valid) {
                        /* created */
                        if (p->two->xfrm_flags & RENAME_DST_MATCHED)
@@ -395,7 +388,7 @@ void diff_detect_rename(struct diff_queue_struct *q,
                }
                else if (strcmp(p->one->path, p->two->path)) {
                        /* rename or copy */
-                       struct diff_file_pair *dp =
+                       struct diff_filepair *dp =
                                diff_queue(q, p->one, p->two);
                        int msglen = (strlen(p->one->path) +
                                      strlen(p->two->path) + 100);
index 5fa7067..c3809ef 100644 (file)
@@ -38,7 +38,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
 extern int diff_populate_filespec(struct diff_filespec *);
 extern void diff_free_filespec_data(struct diff_filespec *);
 
-struct diff_file_pair {
+struct diff_filepair {
        struct diff_filespec *one;
        struct diff_filespec *two;
        char *xfrm_msg;
@@ -47,14 +47,15 @@ struct diff_file_pair {
 };
 
 struct diff_queue_struct {
-       struct diff_file_pair **queue;
+       struct diff_filepair **queue;
        int alloc;
        int nr;
 };
 
-extern struct diff_file_pair *diff_queue(struct diff_queue_struct *,
-                                        struct diff_filespec *,
-                                        struct diff_filespec *);
+extern struct diff_filepair *diff_queue(struct diff_queue_struct *,
+                                       struct diff_filespec *,
+                                       struct diff_filespec *);
 extern void diff_detect_rename(struct diff_queue_struct *, int, int);
+extern void diff_pickaxe(struct diff_queue_struct *, const char *);
 
 #endif