Teach git-rev-list to follow just a specified set of files
authorLinus Torvalds <torvalds@osdl.org>
Fri, 21 Oct 2005 04:25:09 +0000 (21:25 -0700)
committerJunio C Hamano <junkio@cox.net>
Sun, 23 Oct 2005 05:49:52 +0000 (22:49 -0700)
This is the first cut at a git-rev-list that knows to ignore commits that
don't change a certain file (or set of files).

NOTE! For now it only prunes _merge_ commits, and follows the parent where
there are no differences in the set of files specified. In the long run,
I'd like to make it re-write the straight-line history too, but for now
the merge simplification is much more fundamentally important (the
rewriting of straight-line history is largely a separate simplification
phase, but the merge simplification needs to happen early if we want to
optimize away unnecessary commit parsing).

If all parents of a merge change some of the files, the merge is left as
is, so the end result is in no way guaranteed to be a linear history, but
it will often be a lot /more/ linear than the full tree, since it prunes
out parents that didn't matter for that set of files.

As an example from the current kernel:

[torvalds@g5 linux]$ git-rev-list HEAD | wc -l
9885
[torvalds@g5 linux]$ git-rev-list HEAD -- Makefile | wc -l
4084
[torvalds@g5 linux]$ git-rev-list HEAD -- drivers/usb | wc -l
5206

and you can also use 'gitk' to more visually see the pruning of the
history tree, with something like

gitk -- drivers/usb

showing a simplified history that tries to follow the first parent in a
merge that is the parent that fully defines drivers/usb/.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
rev-list.c

index 3a32e40..b5dbb9f 100644 (file)
@@ -5,6 +5,7 @@
 #include "tree.h"
 #include "blob.h"
 #include "epoch.h"
+#include "diff.h"
 
 #define SEEN           (1u << 0)
 #define INTERESTING    (1u << 1)
@@ -44,6 +45,7 @@ static int show_breaks = 0;
 static int stop_traversal = 0;
 static int topo_order = 0;
 static int no_merges = 0;
+static const char **paths = NULL;
 
 static void show_commit(struct commit *commit)
 {
@@ -377,18 +379,129 @@ static void mark_edges_uninteresting(struct commit_list *list)
        }
 }
 
+static int is_different = 0;
+
+static void file_add_remove(struct diff_options *options,
+                   int addremove, unsigned mode,
+                   const unsigned char *sha1,
+                   const char *base, const char *path)
+{
+       is_different = 1;
+}
+
+static void file_change(struct diff_options *options,
+                unsigned old_mode, unsigned new_mode,
+                const unsigned char *old_sha1,
+                const unsigned char *new_sha1,
+                const char *base, const char *path)
+{
+       is_different = 1;
+}
+
+static struct diff_options diff_opt = {
+       .recursive = 1,
+       .add_remove = file_add_remove,
+       .change = file_change,
+};
+
+static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent)
+{
+       if (!commit->tree)
+               return NULL;
+
+       while (parent) {
+               struct commit *p = parent->item;
+               parent = parent->next;
+               parse_commit(p);
+               if (!p->tree)
+                       continue;
+               is_different = 0;
+               if (diff_tree_sha1(commit->tree->object.sha1,
+                                  p->tree->object.sha1, "", &diff_opt) < 0)
+                       continue;
+               if (!is_different)
+                       return p;
+       }
+       return NULL;
+}
+
+static void add_parents_to_list(struct commit *commit, struct commit_list **list)
+{
+       struct commit_list *parent = commit->parents;
+
+       /*
+        * If the commit is uninteresting, don't try to
+        * prune parents - we want the maximal uninteresting
+        * set.
+        *
+        * Normally we haven't parsed the parent
+        * yet, so we won't have a parent of a parent
+        * here. However, it may turn out that we've
+        * reached this commit some other way (where it
+        * wasn't uninteresting), in which case we need
+        * to mark its parents recursively too..
+        */
+       if (commit->object.flags & UNINTERESTING) {
+               while (parent) {
+                       struct commit *p = parent->item;
+                       parent = parent->next;
+                       parse_commit(p);
+                       p->object.flags |= UNINTERESTING;
+                       if (p->parents)
+                               mark_parents_uninteresting(p);
+                       if (p->object.flags & SEEN)
+                               continue;
+                       p->object.flags |= SEEN;
+                       insert_by_date(p, list);
+               }
+               return;
+       }
+
+       /*
+        * Ok, the commit wasn't uninteresting. If it
+        * is a merge, try to find the parent that has
+        * no differences in the path set if one exists.
+        */
+       if (paths && parent && parent->next) {
+               struct commit *preferred;
+
+               preferred = try_to_simplify_merge(commit, parent);
+               if (preferred) {
+                       parent->item = preferred;
+                       parent->next = NULL;
+               }
+       }
+
+       while (parent) {
+               struct commit *p = parent->item;
+
+               parent = parent->next;
+
+               parse_commit(p);
+               if (p->object.flags & SEEN)
+                       continue;
+               p->object.flags |= SEEN;
+               insert_by_date(p, list);
+       }
+}
+
 static struct commit_list *limit_list(struct commit_list *list)
 {
        struct commit_list *newlist = NULL;
        struct commit_list **p = &newlist;
        while (list) {
-               struct commit *commit = pop_most_recent_commit(&list, SEEN);
+               struct commit_list *entry = list;
+               struct commit *commit = list->item;
                struct object *obj = &commit->object;
 
+               list = list->next;
+               free(entry);
+
                if (max_age != -1 && (commit->date < max_age))
                        obj->flags |= UNINTERESTING;
                if (unpacked && has_sha1_pack(obj->sha1))
                        obj->flags |= UNINTERESTING;
+               add_parents_to_list(commit, &list);
                if (obj->flags & UNINTERESTING) {
                        mark_parents_uninteresting(commit);
                        if (everybody_uninteresting(list))
@@ -507,15 +620,15 @@ static void handle_all(struct commit_list **lst)
        global_lst = NULL;
 }
 
-int main(int argc, char **argv)
+int main(int argc, const char **argv)
 {
+       const char *prefix = setup_git_directory();
        struct commit_list *list = NULL;
        int i, limited = 0;
 
-       setup_git_directory();
        for (i = 1 ; i < argc; i++) {
                int flags;
-               char *arg = argv[i];
+               const char *arg = argv[i];
                char *dotdot;
                struct commit *commit;
 
@@ -587,6 +700,14 @@ int main(int argc, char **argv)
                        limited = 1;
                        continue;
                }
+               if (!strcmp(arg, "--")) {
+                       paths = get_pathspec(prefix, argv + i + 1);
+                       if (paths) {
+                               limited = 1;
+                               diff_tree_setup_paths(paths);
+                       }
+                       break;
+               }
 
                if (show_breaks && !merge_order)
                        usage(rev_list_usage);