rev-list: stop when the file disappears
authorLinus Torvalds <torvalds@osdl.org>
Wed, 18 Jan 2006 22:47:30 +0000 (14:47 -0800)
committerJunio C Hamano <junkio@cox.net>
Sat, 28 Jan 2006 08:08:38 +0000 (00:08 -0800)
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like

git-rev-list --remove-empty --parents HEAD -- "$filename"

and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).

It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.

The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.

The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.

As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).

With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).

With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:

push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")

pseudo-parents($i) = git-rev-list parents for that line

if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}

/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}

which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).

Anybody want to try?

Linus

rev-list.c

index e00e6fc..7d3ddc6 100644 (file)
@@ -54,6 +54,7 @@ static int stop_traversal = 0;
 static int topo_order = 0;
 static int no_merges = 0;
 static const char **paths = NULL;
+static int remove_empty_trees = 0;
 
 static void show_commit(struct commit *commit)
 {
@@ -424,14 +425,33 @@ static void mark_edges_uninteresting(struct commit_list *list)
        }
 }
 
-static int is_different = 0;
+#define TREE_SAME      0
+#define TREE_NEW       1
+#define TREE_DIFFERENT 2
+static int tree_difference = TREE_SAME;
 
 static void file_add_remove(struct diff_options *options,
                    int addremove, unsigned mode,
                    const unsigned char *sha1,
                    const char *base, const char *path)
 {
-       is_different = 1;
+       int diff = TREE_DIFFERENT;
+
+       /*
+        * Is it an add of a new file? It means that
+        * the old tree didn't have it at all, so we
+        * will turn "TREE_SAME" -> "TREE_NEW", but
+        * leave any "TREE_DIFFERENT" alone (and if
+        * it already was "TREE_NEW", we'll keep it
+        * "TREE_NEW" of course).
+        */
+       if (addremove == '+') {
+               diff = tree_difference;
+               if (diff != TREE_SAME)
+                       return;
+               diff = TREE_NEW;
+       }
+       tree_difference = diff;
 }
 
 static void file_change(struct diff_options *options,
@@ -440,7 +460,7 @@ static void file_change(struct diff_options *options,
                 const unsigned char *new_sha1,
                 const char *base, const char *path)
 {
-       is_different = 1;
+       tree_difference = TREE_DIFFERENT;
 }
 
 static struct diff_options diff_opt = {
@@ -449,12 +469,16 @@ static struct diff_options diff_opt = {
        .change = file_change,
 };
 
-static int same_tree(struct tree *t1, struct tree *t2)
+static int compare_tree(struct tree *t1, struct tree *t2)
 {
-       is_different = 0;
+       if (!t1)
+               return TREE_NEW;
+       if (!t2)
+               return TREE_DIFFERENT;
+       tree_difference = TREE_SAME;
        if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
-               return 0;
-       return !is_different;
+               return TREE_DIFFERENT;
+       return tree_difference;
 }
 
 static int same_tree_as_empty(struct tree *t1)
@@ -474,28 +498,55 @@ static int same_tree_as_empty(struct tree *t1)
        empty.buf = "";
        empty.size = 0;
 
-       is_different = 0;
+       tree_difference = 0;
        retval = diff_tree(&empty, &real, "", &diff_opt);
        free(tree);
 
-       return retval >= 0 && !is_different;
+       return retval >= 0 && !tree_difference;
 }
 
-static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent)
+static void try_to_simplify_commit(struct commit *commit)
 {
+       struct commit_list **pp, *parent;
+
        if (!commit->tree)
-               return NULL;
+               return;
 
-       while (parent) {
+       if (!commit->parents) {
+               if (!same_tree_as_empty(commit->tree))
+                       commit->object.flags |= TREECHANGE;
+               return;
+       }
+
+       pp = &commit->parents;
+       while ((parent = *pp) != NULL) {
                struct commit *p = parent->item;
-               parent = parent->next;
+
+               if (p->object.flags & UNINTERESTING) {
+                       pp = &parent->next;
+                       continue;
+               }
+
                parse_commit(p);
-               if (!p->tree)
+               switch (compare_tree(p->tree, commit->tree)) {
+               case TREE_SAME:
+                       parent->next = NULL;
+                       commit->parents = parent;
+                       return;
+
+               case TREE_NEW:
+                       if (remove_empty_trees && same_tree_as_empty(p->tree)) {
+                               *pp = parent->next;
+                               continue;
+                       }
+               /* fallthrough */
+               case TREE_DIFFERENT:
+                       pp = &parent->next;
                        continue;
-               if (same_tree(commit->tree, p->tree))
-                       return p;
+               }
+               die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
        }
-       return NULL;
+       commit->object.flags |= TREECHANGE;
 }
 
 static void add_parents_to_list(struct commit *commit, struct commit_list **list)
@@ -531,20 +582,14 @@ static void add_parents_to_list(struct commit *commit, struct commit_list **list
        }
 
        /*
-        * Ok, the commit wasn't uninteresting. If it
-        * is a merge, try to find the parent that has
-        * no differences in the path set if one exists.
+        * Ok, the commit wasn't uninteresting. Try to
+        * simplify the commit history and find the parent
+        * that has no differences in the path set if one exists.
         */
-       if (paths && parent && parent->next) {
-               struct commit *preferred;
-
-               preferred = try_to_simplify_merge(commit, parent);
-               if (preferred) {
-                       parent->item = preferred;
-                       parent->next = NULL;
-               }
-       }
+       if (paths)
+               try_to_simplify_commit(commit);
 
+       parent = commit->parents;
        while (parent) {
                struct commit *p = parent->item;
 
@@ -558,33 +603,6 @@ static void add_parents_to_list(struct commit *commit, struct commit_list **list
        }
 }
 
-static void compress_list(struct commit_list *list)
-{
-       while (list) {
-               struct commit *commit = list->item;
-               struct commit_list *parent = commit->parents;
-               list = list->next;
-
-               if (!parent) {
-                       if (!same_tree_as_empty(commit->tree))
-                               commit->object.flags |= TREECHANGE;
-                       continue;
-               }
-
-               /*
-                * Exactly one parent? Check if it leaves the tree
-                * unchanged
-                */
-               if (!parent->next) {
-                       struct tree *t1 = commit->tree;
-                       struct tree *t2 = parent->item->tree;
-                       if (!t1 || !t2 || same_tree(t1, t2))
-                               continue;
-               }
-               commit->object.flags |= TREECHANGE;
-       }
-}
-
 static struct commit_list *limit_list(struct commit_list *list)
 {
        struct commit_list *newlist = NULL;
@@ -614,8 +632,6 @@ static struct commit_list *limit_list(struct commit_list *list)
        }
        if (tree_objects)
                mark_edges_uninteresting(newlist);
-       if (paths && dense)
-               compress_list(newlist);
        if (bisect_list)
                newlist = find_bisection(newlist);
        return newlist;
@@ -808,6 +824,10 @@ int main(int argc, const char **argv)
                        dense = 0;
                        continue;
                }
+               if (!strcmp(arg, "--remove-empty")) {
+                       remove_empty_trees = 1;
+                       continue;
+               }
                if (!strcmp(arg, "--")) {
                        i++;
                        break;