X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=git-svnimport.perl;h=cb241d1b5174fad6b6eb081635e614d251688f0d;hb=1e80e0449248edb77b0fb9853f4a3404a599e207;hp=821f51f105903c5241dcf8a55092dc6ed3fc76b3;hpb=16e685967d638fd50860fa24bd5f2d06ab4f8e96;p=git.git diff --git a/git-svnimport.perl b/git-svnimport.perl index 821f51f1..cb241d1b 100755 --- a/git-svnimport.perl +++ b/git-svnimport.perl @@ -5,13 +5,12 @@ # # The basic idea is to pull and analyze SVN changes. # -# Checking out the files is done by a single long-running CVS connection -# / server process. +# Checking out the files is done by a single long-running SVN connection. # # The head revision is on branch "origin" by default. # You can change that with the '-o' option. -require v5.8.0; # for shell-safe open("-|",LIST) +require 5.008; # for shell-safe open("-|",LIST) use strict; use warnings; use Getopt::Std; @@ -26,49 +25,39 @@ use IPC::Open2; use SVN::Core; use SVN::Ra; -die "Need CVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1"; +die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1"; $SIG{'PIPE'}="IGNORE"; $ENV{'TZ'}="UTC"; -our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l); +our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l,$opt_d,$opt_D); sub usage() { print STDERR <; - chomp $cvs_tree; - close $f; -} else { - usage(); -} +my $svn_url = $ARGV[0]; +my $svn_dir = $ARGV[1]; our @mergerx = (); if ($opt_m) { @@ -117,30 +106,47 @@ sub conn { sub file { my($self,$path,$rev) = @_; - my $res; - my ($fh, $name) = tempfile('gitsvn.XXXXXX', + my ($fh, $name) = tempfile('gitsvn.XXXXXX', DIR => File::Spec->tmpdir(), UNLINK => 1); print "... $rev $path ...\n" if $opt_v; - eval { $self->{'svn'}->get_file($path,$rev,$fh); }; - if ($@ and $@ !~ /Attempted to get checksum/) { - # retry - $self->conn(); - eval { $self->{'svn'}->get_file($path,$rev,$fh); }; - }; - return () if $@ and $@ !~ /Attempted to get checksum/; - die $@ if $@; + my $pool = SVN::Pool->new(); + eval { $self->{'svn'}->get_file($path,$rev,$fh,$pool); }; + $pool->clear; + if($@) { + return undef if $@ =~ /Attempted to get checksum/; + die $@; + } close ($fh); - return ($name, $res); + return $name; } - package main; - -my $svn = SVNconn->new($cvs_tree); - +use URI; + +our $svn = $svn_url; +$svn .= "/$svn_dir" if defined $svn_dir; +my $svn2 = SVNconn->new($svn); +$svn = SVNconn->new($svn); + +my $lwp_ua; +if($opt_d or $opt_D) { + $svn_url = URI->new($svn_url)->canonical; + if($opt_D) { + $svn_dir =~ s#/*$#/#; + } else { + $svn_dir = ""; + } + if ($svn_url->scheme eq "http") { + use LWP::UserAgent; + $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []); + } else { + print STDERR "Warning: not HTTP; turning off direct file access\n"; + $opt_d=0; + } +} sub pdate($) { my($d) = @_; @@ -159,9 +165,9 @@ sub getwd() { sub get_headref($$) { my $name = shift; - my $git_dir = shift; + my $git_dir = shift; my $sha; - + if (open(C,"$git_dir/refs/heads/$name")) { chomp($sha = ); close(C); @@ -193,7 +199,7 @@ $ENV{GIT_INDEX_FILE} = $git_index; my $maxnum = 0; my $last_rev = ""; my $last_branch; -my $current_rev = $opt_s ? ($opt_s-1) : 0; +my $current_rev = $opt_s || 1; unless(-d $git_dir) { system("git-init-db"); die "Cannot init the GIT db at $git_tree: $?\n" if $?; @@ -211,7 +217,11 @@ unless(-d $git_dir) { -f "$git_dir/svn2git" or die "'$git_dir/svn2git' does not exist.\n". "You need that file for incremental imports.\n"; - $last_branch = basename(readlink("$git_dir/HEAD")); + open(F, "git-symbolic-ref HEAD |") or + die "Cannot run git-symbolic-ref: $!\n"; + chomp ($last_branch = ); + $last_branch = basename($last_branch); + close(F); unless($last_branch) { warn "Cannot read the last branch name: $! -- assuming 'master'\n"; $last_branch = "master"; @@ -231,7 +241,7 @@ EOM $forward_master = $opt_o ne 'master' && -f "$git_dir/refs/heads/master" && - system('cmp', '-s', "$git_dir/refs/heads/master", + system('cmp', '-s', "$git_dir/refs/heads/master", "$git_dir/refs/heads/$opt_o") == 0; # populate index @@ -245,7 +255,7 @@ EOM my($num,$branch,$ref) = split; $branches{$branch}{$num} = $ref; $branches{$branch}{"LAST"} = $ref; - $current_rev = $num if $current_rev < $num; + $current_rev = $num+1 if $current_rev <= $num; } close($B); } @@ -254,42 +264,61 @@ EOM open BRANCHES,">>", "$git_dir/svn2git"; +sub node_kind($$$) { + my ($branch, $path, $revision) = @_; + my $pool=SVN::Pool->new; + my $kind = $svn->{'svn'}->check_path(revert_split_path($branch,$path),$revision,$pool); + $pool->clear; + return $kind; +} -## cvsps output: -#--------------------- -#PatchSet 314 -#Date: 1999/09/18 13:03:59 -#Author: wkoch -#Branch: STABLE-BRANCH-1-0 -#Ancestor branch: HEAD -#Tag: (none) -#Log: -# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch -#Members: -# README:1.57->1.57.2.1 -# VERSION:1.96->1.96.2.1 -# -#--------------------- - -my $state = 0; - -sub get_file($$$) { - my($rev,$branch,$path) = @_; +sub revert_split_path($$) { + my($branch,$path) = @_; - # revert split_path(), below my $svnpath; $path = "" if $path eq "/"; # this should not happen, but ... if($branch eq "/") { - $svnpath = "/$trunk_name/$path"; + $svnpath = "$trunk_name/$path"; } elsif($branch =~ m#^/#) { - $svnpath = "/$tag_name$branch/$path"; + $svnpath = "$tag_name$branch/$path"; } else { - $svnpath = "/$branch_name/$branch/$path"; + $svnpath = "$branch_name/$branch/$path"; } + $svnpath =~ s#/+$##; + return $svnpath; +} + +sub get_file($$$) { + my($rev,$branch,$path) = @_; + + my $svnpath = revert_split_path($branch,$path); + # now get it - my ($name, $res) = eval { $svn->file($svnpath,$rev); }; - return () unless defined $name; + my $name; + if($opt_d) { + my($req,$res); + + # /svn/!svn/bc/2/django/trunk/django-docs/build.py + my $url=$svn_url->clone(); + $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath"); + print "... $path...\n" if $opt_v; + $req = HTTP::Request->new(GET => $url); + $res = $lwp_ua->request($req); + if ($res->is_success) { + my $fh; + ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), UNLINK => 1); + print $fh $res->content; + close($fh) or die "Could not write $name: $!\n"; + } else { + return undef if $res->code == 301; # directory? + die $res->status_line." at $url\n"; + } + } else { + $name = $svn->file("/$svnpath",$rev); + return undef unless defined $name; + } open my $F, '-|', "git-hash-object", "-w", $name or die "Cannot create object: $!\n"; @@ -312,17 +341,83 @@ sub split_path($$) { } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) { $branch = $1; } else { - print STDERR "$rev: Unrecognized path: $path\n"; + my %no_error = ( + "/" => 1, + "/$tag_name" => 1, + "/$branch_name" => 1 + ); + print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path}); return () } $path = "/" if $path eq ""; return ($branch,$path); } +sub branch_rev($$) { + + my ($srcbranch,$uptorev) = @_; + + my $bbranches = $branches{$srcbranch}; + my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches; + my $therev; + foreach my $arev(@revs) { + next if ($arev eq 'LAST'); + if ($arev <= $uptorev) { + $therev = $arev; + last; + } + } + return $therev; +} + +sub copy_path($$$$$$$$) { + # Somebody copied a whole subdirectory. + # We need to find the index entries from the old version which the + # SVN log entry points to, and add them to the new place. + + my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_; + + my($srcbranch,$srcpath) = split_path($rev,$oldpath); + unless(defined $srcbranch) { + print "Path not found when copying from $oldpath @ $rev\n"; + return; + } + my $therev = branch_rev($srcbranch, $rev); + my $gitrev = $branches{$srcbranch}{$therev}; + unless($gitrev) { + print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n"; + return; + } + if ($srcbranch ne $newbranch) { + push(@$parents, $branches{$srcbranch}{'LAST'}); + } + print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v; + if ($node_kind eq $SVN::Node::dir) { + $srcpath =~ s#/*$#/#; + } + + open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath; + local $/ = "\0"; + while(<$f>) { + chomp; + my($m,$p) = split(/\t/,$_,2); + my($mode,$type,$sha1) = split(/ /,$m); + next if $type ne "blob"; + if ($node_kind eq $SVN::Node::dir) { + $p = $path . substr($p,length($srcpath)-1); + } else { + $p = $path; + } + push(@$new,[$mode,$sha1,$p]); + } + close($f) or + print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n"; +} + sub commit { my($branch, $changed_paths, $revision, $author, $date, $message) = @_; my($author_name,$author_email,$dest); - my(@old,@new); + my(@old,@new,@parents); if (not defined $author) { $author_name = $author_email = "unknown"; @@ -409,33 +504,40 @@ sub commit { $last_rev = $rev; } + push (@parents, $rev) if defined $rev; + my $cid; if($tag and not %$changed_paths) { $cid = $rev; } else { - while(my($path,$action) = each %$changed_paths) { - if ($action->[0] eq "A") { - my $f = get_file($revision,$branch,$path); - push(@new,$f) if $f; + my @paths = sort keys %$changed_paths; + foreach my $path(@paths) { + my $action = $changed_paths->{$path}; + + if ($action->[0] eq "R") { + # refer to a file/tree in an earlier commit + push(@old,$path); # remove any old stuff + } + if(($action->[0] eq "A") || ($action->[0] eq "R")) { + my $node_kind = node_kind($branch,$path,$revision); + if($action->[1]) { + copy_path($revision,$branch,$path,$action->[1],$action->[2],$node_kind,\@new,\@parents); + } elsif ($node_kind eq $SVN::Node::file) { + my $f = get_file($revision,$branch,$path); + if ($f) { + push(@new,$f) if $f; + } else { + my $opath = $action->[3]; + print STDERR "$revision: $branch: could not fetch '$opath'\n"; + } + } } elsif ($action->[0] eq "D") { push(@old,$path); } elsif ($action->[0] eq "M") { - my $f = get_file($revision,$branch,$path); - push(@new,$f) if $f; - } elsif ($action->[0] eq "R") { - # refer to a file/tree in an earlier commit - push(@old,$path); # remove any old stuff - - # ... and add any new stuff - my($b,$p) = split_path($revision,$action->[1]); - open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p; - local $/ = '\0'; - while(<$F>) { - chomp; - my($m,$p) = split(/\t/,$_,2); - my($mode,$type,$sha1) = split(/ /,$m); - next if $type ne "blob"; - push(@new,[$mode,$sha1,$p]); + my $node_kind = node_kind($branch,$path,$revision); + if ($node_kind eq $SVN::Node::file) { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; } } else { die "$revision: unknown action '".$action->[0]."' for $path\n"; @@ -445,7 +547,7 @@ sub commit { if(@old) { open my $F, "-|", "git-ls-files", "-z", @old or die $!; @old = (); - local $/ = '\0'; + local $/ = "\0"; while(<$F>) { chomp; push(@old,$_); @@ -504,7 +606,6 @@ sub commit { $pw->close(); my @par = (); - @par = ("-p",$rev) if defined $rev; # loose detection of merges # based on the commit msg @@ -514,10 +615,16 @@ sub commit { if ($mparent eq 'HEAD') { $mparent = $opt_o }; if ( -e "$git_dir/refs/heads/$mparent") { $mparent = get_headref($mparent, $git_dir); - push @par, '-p', $mparent; + push (@parents, $mparent); print OUT "Merge parent branch: $mparent\n" if $opt_v; } - } + } + } + my %seen_parents = (); + my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents; + foreach my $bparent (@unique_parents) { + push @par, '-p', $bparent; + print OUT "Merge parent branch: $bparent\n" if $opt_v; } exec("env", @@ -550,11 +657,15 @@ sub commit { die "Error running git-commit-tree: $?\n" if $?; } + if (not defined $cid) { + $cid = $branches{"/"}{"LAST"}; + } + if(not defined $dest) { print "... no known parent\n" if $opt_v; } elsif(not $tag) { print "Writing to refs/heads/$dest\n" if $opt_v; - open(C,">$git_dir/refs/heads/$dest") and + open(C,">$git_dir/refs/heads/$dest") and print C ("$cid\n") and close(C) or die "Cannot write branch $dest for update: $!\n"; @@ -564,8 +675,9 @@ sub commit { my($in, $out) = ('',''); $last_rev = "-" if %$changed_paths; # the tag was 'complex', i.e. did not refer to a "real" revision - + $dest =~ tr/_/\./ if $opt_u; + $branch = $dest; my $pid = open2($in, $out, 'git-mktag'); print $out ("object $cid\n". @@ -597,17 +709,17 @@ sub commit { print "DONE: $revision $dest $cid\n" if $opt_v; } -my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; -sub _commit_all { - ($changed_paths, $revision, $author, $date, $message, $pool) = @_; +sub commit_all { + # Recursive use of the SVN connection does not work + local $svn = $svn2; + + my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; my %p; while(my($path,$action) = each %$changed_paths) { - $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ]; + $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ]; } $changed_paths = \%p; -} -sub commit_all { my %done; my @col; my $pref; @@ -623,15 +735,20 @@ sub commit_all { } } -while(++$current_rev <= $svn->{'maxrev'}) { - $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,""); - commit_all(); - if($opt_l and not --$opt_l) { - print STDERR "Exiting due to a memory leak. Repeat, please.\n"; - last; - } +$opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'}; + +if ($svn->{'maxrev'} < $current_rev) { + print "Up to date: no new revisions to fetch!\n" if $opt_v; + unlink("$git_dir/SVN2GIT_HEAD"); + exit; } +print "Fetching from $current_rev to $opt_l ...\n" if $opt_v; + +my $pool=SVN::Pool->new; +$svn->{'svn'}->get_log("/",$current_rev,$opt_l,0,1,1,\&commit_all,$pool); +$pool->clear; + unlink($git_index); @@ -655,8 +772,7 @@ if($orig_branch) { print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0); system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") unless -f "$git_dir/refs/heads/master"; - unlink("$git_dir/HEAD"); - symlink("refs/heads/$orig_branch","$git_dir/HEAD"); + system('git-update-ref', 'HEAD', "$orig_branch"); unless ($opt_i) { system('git checkout'); die "checkout failed: $?\n" if $?;