From: Eric Wong Date: Sun, 26 Mar 2006 02:52:31 +0000 (-0800) Subject: contrib/git-svn: stabilize memory usage for big fetches X-Git-Tag: v1.3.0-rc1~1^2~2 X-Git-Url: https://git.octo.it/?p=git.git;a=commitdiff_plain;h=03823184247215fba1dd9c9c39659d08dea3bad7 contrib/git-svn: stabilize memory usage for big fetches We should be safely able to import histories with thousands of revisions without hogging up lots of memory. With this, we lose the ability to autocorrect mistakes when people specify revisions in reverse, but it's probably no longer a problem since we only have one method of log parsing nowadays. I've added an extra check to ensure that revision numbers do increment. Also, increment the version number to 0.11.0. I really should just call it 1.0 soon... Signed-off-by: Eric Wong Signed-off-by: Junio C Hamano --- diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl index f3fc3ec1..3e5733ee 100755 --- a/contrib/git-svn/git-svn.perl +++ b/contrib/git-svn/git-svn.perl @@ -8,7 +8,7 @@ use vars qw/ $AUTHOR $VERSION $GIT_SVN_INDEX $GIT_SVN $GIT_DIR $REV_DIR/; $AUTHOR = 'Eric Wong '; -$VERSION = '0.10.0'; +$VERSION = '0.11.0'; $GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git"; # make sure the svn binary gives consistent output between locales and TZs: $ENV{TZ} = 'UTC'; @@ -217,9 +217,8 @@ sub fetch { push @log_args, '--stop-on-copy' unless $_no_stop_copy; my $svn_log = svn_log_raw(@log_args); - @$svn_log = sort { $a->{revision} <=> $b->{revision} } @$svn_log; - my $base = shift @$svn_log or croak "No base revision!\n"; + my $base = next_log_entry($svn_log) or croak "No base revision!\n"; my $last_commit = undef; unless (-d $SVN_WC) { svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC); @@ -234,18 +233,22 @@ sub fetch { } my @svn_up = qw(svn up); push @svn_up, '--ignore-externals' unless $_no_ignore_ext; - my $last_rev = $base->{revision}; - foreach my $log_msg (@$svn_log) { - assert_svn_wc_clean($last_rev, $last_commit); - $last_rev = $log_msg->{revision}; - sys(@svn_up,"-r$last_rev"); + my $last = $base; + while (my $log_msg = next_log_entry($svn_log)) { + assert_svn_wc_clean($last->{revision}, $last_commit); + if ($last->{revision} >= $log_msg->{revision}) { + croak "Out of order: last >= current: ", + "$last->{revision} >= $log_msg->{revision}\n"; + } + sys(@svn_up,"-r$log_msg->{revision}"); $last_commit = git_commit($log_msg, $last_commit, @parents); + $last = $log_msg; } - assert_svn_wc_clean($last_rev, $last_commit); + assert_svn_wc_clean($last->{revision}, $last_commit); unless (-e "$GIT_DIR/refs/heads/master") { sys(qw(git-update-ref refs/heads/master),$last_commit); } - return pop @$svn_log; + return $last; } sub commit { @@ -708,49 +711,61 @@ sub svn_commit_tree { return fetch("$rev_committed=$commit")->{revision}; } +# read the entire log into a temporary file (which is removed ASAP) +# and store the file handle + parser state sub svn_log_raw { my (@log_args) = @_; - my $pid = open my $log_fh,'-|'; + my $log_fh = IO::File->new_tmpfile or croak $!; + my $pid = fork; defined $pid or croak $!; - - if ($pid == 0) { + if (!$pid) { + open STDOUT, '>&', $log_fh or croak $!; exec (qw(svn log), @log_args) or croak $! } + waitpid $pid, 0; + croak if $?; + seek $log_fh, 0, 0 or croak $!; + return { state => 'sep', fh => $log_fh }; +} + +sub next_log_entry { + my $log = shift; # retval of svn_log_raw() + my $ret = undef; + my $fh = $log->{fh}; - my @svn_log; - my $state = 'sep'; - while (<$log_fh>) { + while (<$fh>) { chomp; if (/^\-{72}$/) { - if ($state eq 'msg') { - if ($svn_log[$#svn_log]->{lines}) { - $svn_log[$#svn_log]->{msg} .= $_."\n"; - unless(--$svn_log[$#svn_log]->{lines}) { - $state = 'sep'; + if ($log->{state} eq 'msg') { + if ($ret->{lines}) { + $ret->{msg} .= $_."\n"; + unless(--$ret->{lines}) { + $log->{state} = 'sep'; } } else { croak "Log parse error at: $_\n", - $svn_log[$#svn_log]->{revision}, + $ret->{revision}, "\n"; } next; } - if ($state ne 'sep') { + if ($log->{state} ne 'sep') { croak "Log parse error at: $_\n", - "state: $state\n", - $svn_log[$#svn_log]->{revision}, + "state: $log->{state}\n", + $ret->{revision}, "\n"; } - $state = 'rev'; + $log->{state} = 'rev'; # if we have an empty log message, put something there: - if (@svn_log) { - $svn_log[$#svn_log]->{msg} ||= "\n"; - delete $svn_log[$#svn_log]->{lines}; + if ($ret) { + $ret->{msg} ||= "\n"; + delete $ret->{lines}; + return $ret; } next; } - if ($state eq 'rev' && s/^r(\d+)\s*\|\s*//) { + if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) { my $rev = $1; my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3); ($lines) = ($lines =~ /(\d+)/); @@ -758,36 +773,34 @@ sub svn_log_raw { /(\d{4})\-(\d\d)\-(\d\d)\s (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x) or croak "Failed to parse date: $date\n"; - my %log_msg = ( revision => $rev, + $ret = { revision => $rev, date => "$tz $Y-$m-$d $H:$M:$S", author => $author, lines => $lines, - msg => '' ); + msg => '' }; if (defined $_authors && ! defined $users{$author}) { die "Author: $author not defined in ", "$_authors file\n"; } - push @svn_log, \%log_msg; - $state = 'msg_start'; + $log->{state} = 'msg_start'; next; } # skip the first blank line of the message: - if ($state eq 'msg_start' && /^$/) { - $state = 'msg'; - } elsif ($state eq 'msg') { - if ($svn_log[$#svn_log]->{lines}) { - $svn_log[$#svn_log]->{msg} .= $_."\n"; - unless (--$svn_log[$#svn_log]->{lines}) { - $state = 'sep'; + if ($log->{state} eq 'msg_start' && /^$/) { + $log->{state} = 'msg'; + } elsif ($log->{state} eq 'msg') { + if ($ret->{lines}) { + $ret->{msg} .= $_."\n"; + unless (--$ret->{lines}) { + $log->{state} = 'sep'; } } else { croak "Log parse error at: $_\n", - $svn_log[$#svn_log]->{revision},"\n"; + $ret->{revision},"\n"; } } } - close $log_fh or croak $?; - return \@svn_log; + return $ret; } sub svn_info { @@ -1114,9 +1127,13 @@ __END__ Data structures: -@svn_log = array of log_msg hashes +$svn_log hashref (as returned by svn_log_raw) +{ + fh => file handle of the log file, + state => state of the log file parser (sep/msg/rev/msg_start...) +} -$log_msg hash +$log_msg hashref as returned by next_log_entry($svn_log) { msg => 'whitespace-formatted log entry ', # trailing newline is preserved