contrib/git-svn: stabilize memory usage for big fetches
[git.git] / contrib / git-svn / git-svn.perl
index f3fc3ec..3e5733e 100755 (executable)
@@ -8,7 +8,7 @@ use vars qw/    $AUTHOR $VERSION
                $GIT_SVN_INDEX $GIT_SVN
                $GIT_DIR $REV_DIR/;
 $AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
-$VERSION = '0.10.0';
+$VERSION = '0.11.0';
 $GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git";
 # make sure the svn binary gives consistent output between locales and TZs:
 $ENV{TZ} = 'UTC';
@@ -217,9 +217,8 @@ sub fetch {
        push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 
        my $svn_log = svn_log_raw(@log_args);
-       @$svn_log = sort { $a->{revision} <=> $b->{revision} } @$svn_log;
 
-       my $base = shift @$svn_log or croak "No base revision!\n";
+       my $base = next_log_entry($svn_log) or croak "No base revision!\n";
        my $last_commit = undef;
        unless (-d $SVN_WC) {
                svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
@@ -234,18 +233,22 @@ sub fetch {
        }
        my @svn_up = qw(svn up);
        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
-       my $last_rev = $base->{revision};
-       foreach my $log_msg (@$svn_log) {
-               assert_svn_wc_clean($last_rev, $last_commit);
-               $last_rev = $log_msg->{revision};
-               sys(@svn_up,"-r$last_rev");
+       my $last = $base;
+       while (my $log_msg = next_log_entry($svn_log)) {
+               assert_svn_wc_clean($last->{revision}, $last_commit);
+               if ($last->{revision} >= $log_msg->{revision}) {
+                       croak "Out of order: last >= current: ",
+                               "$last->{revision} >= $log_msg->{revision}\n";
+               }
+               sys(@svn_up,"-r$log_msg->{revision}");
                $last_commit = git_commit($log_msg, $last_commit, @parents);
+               $last = $log_msg;
        }
-       assert_svn_wc_clean($last_rev, $last_commit);
+       assert_svn_wc_clean($last->{revision}, $last_commit);
        unless (-e "$GIT_DIR/refs/heads/master") {
                sys(qw(git-update-ref refs/heads/master),$last_commit);
        }
-       return pop @$svn_log;
+       return $last;
 }
 
 sub commit {
@@ -708,49 +711,61 @@ sub svn_commit_tree {
        return fetch("$rev_committed=$commit")->{revision};
 }
 
+# read the entire log into a temporary file (which is removed ASAP)
+# and store the file handle + parser state
 sub svn_log_raw {
        my (@log_args) = @_;
-       my $pid = open my $log_fh,'-|';
+       my $log_fh = IO::File->new_tmpfile or croak $!;
+       my $pid = fork;
        defined $pid or croak $!;
-
-       if ($pid == 0) {
+       if (!$pid) {
+               open STDOUT, '>&', $log_fh or croak $!;
                exec (qw(svn log), @log_args) or croak $!
        }
+       waitpid $pid, 0;
+       croak if $?;
+       seek $log_fh, 0, 0 or croak $!;
+       return { state => 'sep', fh => $log_fh };
+}
+
+sub next_log_entry {
+       my $log = shift; # retval of svn_log_raw()
+       my $ret = undef;
+       my $fh = $log->{fh};
 
-       my @svn_log;
-       my $state = 'sep';
-       while (<$log_fh>) {
+       while (<$fh>) {
                chomp;
                if (/^\-{72}$/) {
-                       if ($state eq 'msg') {
-                               if ($svn_log[$#svn_log]->{lines}) {
-                                       $svn_log[$#svn_log]->{msg} .= $_."\n";
-                                       unless(--$svn_log[$#svn_log]->{lines}) {
-                                               $state = 'sep';
+                       if ($log->{state} eq 'msg') {
+                               if ($ret->{lines}) {
+                                       $ret->{msg} .= $_."\n";
+                                       unless(--$ret->{lines}) {
+                                               $log->{state} = 'sep';
                                        }
                                } else {
                                        croak "Log parse error at: $_\n",
-                                               $svn_log[$#svn_log]->{revision},
+                                               $ret->{revision},
                                                "\n";
                                }
                                next;
                        }
-                       if ($state ne 'sep') {
+                       if ($log->{state} ne 'sep') {
                                croak "Log parse error at: $_\n",
-                                       "state: $state\n",
-                                       $svn_log[$#svn_log]->{revision},
+                                       "state: $log->{state}\n",
+                                       $ret->{revision},
                                        "\n";
                        }
-                       $state = 'rev';
+                       $log->{state} = 'rev';
 
                        # if we have an empty log message, put something there:
-                       if (@svn_log) {
-                               $svn_log[$#svn_log]->{msg} ||= "\n";
-                               delete $svn_log[$#svn_log]->{lines};
+                       if ($ret) {
+                               $ret->{msg} ||= "\n";
+                               delete $ret->{lines};
+                               return $ret;
                        }
                        next;
                }
-               if ($state eq 'rev' && s/^r(\d+)\s*\|\s*//) {
+               if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
                        my $rev = $1;
                        my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
                        ($lines) = ($lines =~ /(\d+)/);
@@ -758,36 +773,34 @@ sub svn_log_raw {
                                        /(\d{4})\-(\d\d)\-(\d\d)\s
                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
                                         or croak "Failed to parse date: $date\n";
-                       my %log_msg = ( revision => $rev,
+                       $ret = {        revision => $rev,
                                        date => "$tz $Y-$m-$d $H:$M:$S",
                                        author => $author,
                                        lines => $lines,
-                                       msg => '' );
+                                       msg => '' };
                        if (defined $_authors && ! defined $users{$author}) {
                                die "Author: $author not defined in ",
                                                "$_authors file\n";
                        }
-                       push @svn_log, \%log_msg;
-                       $state = 'msg_start';
+                       $log->{state} = 'msg_start';
                        next;
                }
                # skip the first blank line of the message:
-               if ($state eq 'msg_start' && /^$/) {
-                       $state = 'msg';
-               } elsif ($state eq 'msg') {
-                       if ($svn_log[$#svn_log]->{lines}) {
-                               $svn_log[$#svn_log]->{msg} .= $_."\n";
-                               unless (--$svn_log[$#svn_log]->{lines}) {
-                                       $state = 'sep';
+               if ($log->{state} eq 'msg_start' && /^$/) {
+                       $log->{state} = 'msg';
+               } elsif ($log->{state} eq 'msg') {
+                       if ($ret->{lines}) {
+                               $ret->{msg} .= $_."\n";
+                               unless (--$ret->{lines}) {
+                                       $log->{state} = 'sep';
                                }
                        } else {
                                croak "Log parse error at: $_\n",
-                                       $svn_log[$#svn_log]->{revision},"\n";
+                                       $ret->{revision},"\n";
                        }
                }
        }
-       close $log_fh or croak $?;
-       return \@svn_log;
+       return $ret;
 }
 
 sub svn_info {
@@ -1114,9 +1127,13 @@ __END__
 
 Data structures:
 
-@svn_log = array of log_msg hashes
+$svn_log hashref (as returned by svn_log_raw)
+{
+       fh => file handle of the log file,
+       state => state of the log file parser (sep/msg/rev/msg_start...)
+}
 
-$log_msg hash
+$log_msg hashref as returned by next_log_entry($svn_log)
 {
        msg => 'whitespace-formatted log entry
 ',                                             # trailing newline is preserved