X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=git-svnimport.perl;h=38ac732ca9b677a5647a96db44f7133b47db0648;hb=HEAD;hp=896222ba741e983b3f7b76feaeae268b250ace92;hpb=7ee74a99b2b710b5f5adc22db2fe0aca8a74c809;p=git.git diff --git a/git-svnimport.perl b/git-svnimport.perl index 896222ba..38ac732c 100755 --- a/git-svnimport.perl +++ b/git-svnimport.perl @@ -5,16 +5,15 @@ # # The basic idea is to pull and analyze SVN changes. # -# Checking out the files is done by a single long-running CVS connection -# / server process. +# Checking out the files is done by a single long-running SVN connection. # # The head revision is on branch "origin" by default. # You can change that with the '-o' option. -require v5.8.0; # for shell-safe open("-|",LIST) use strict; use warnings; use Getopt::Std; +use File::Copy; use File::Spec; use File::Temp qw(tempfile); use File::Path qw(mkpath); @@ -26,55 +25,74 @@ use IPC::Open2; use SVN::Core; use SVN::Ra; -die "Need CVN:COre 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1"; +die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1"; $SIG{'PIPE'}="IGNORE"; $ENV{'TZ'}="UTC"; -our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s); +our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T, + $opt_b,$opt_r,$opt_I,$opt_A,$opt_s,$opt_l,$opt_d,$opt_D); sub usage() { print STDERR <; - chomp $cvs_tree; - close $f; -} else { - usage(); -} +my $svn_url = $ARGV[0]; +my $svn_dir = $ARGV[1]; our @mergerx = (); if ($opt_m) { - @mergerx = ( qr/\W(?:from|of|merge|merging|merged) (\w+)/i ); + my $branch_esc = quotemeta ($branch_name); + my $trunk_esc = quotemeta ($trunk_name); + @mergerx = + ( + qr!\b(?:merg(?:ed?|ing))\b.*?\b((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i, + qr!\b(?:from|of)\W+((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i, + qr!\b(?:from|of)\W+(?:the )?([\w\.\-]+)[-\s]branch\b!i + ); } if ($opt_M) { - push (@mergerx, qr/$opt_M/); + unshift (@mergerx, qr/$opt_M/); +} + +# Absolutize filename now, since we will have chdir'ed by the time we +# get around to opening it. +$opt_A = File::Spec->rel2abs($opt_A) if $opt_A; + +our %users = (); +our $users_file = undef; +sub read_users($) { + $users_file = File::Spec->rel2abs(@_); + die "Cannot open $users_file\n" unless -f $users_file; + open(my $authors,$users_file); + while(<$authors>) { + chomp; + next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/; + (my $user,my $name,my $email) = ($1,$2,$3); + $users{$user} = [$name,$email]; + } + close($authors); } select(STDERR); $|=1; select(STDOUT); @@ -87,6 +105,7 @@ package SVNconn; use File::Spec; use File::Temp qw(tempfile); use POSIX qw(strftime dup2); +use Fcntl qw(SEEK_SET); sub new { my($what,$repo) = @_; @@ -106,8 +125,10 @@ sub new { sub conn { my $self = shift; my $repo = $self->{'fullrep'}; - my $s = SVN::Ra->new($repo); - + my $auth = SVN::Core::auth_open ([SVN::Client::get_simple_provider, + SVN::Client::get_ssl_server_trust_file_provider, + SVN::Client::get_username_provider]); + my $s = SVN::Ra->new(url => $repo, auth => $auth); die "SVN connection to $repo: $!\n" unless defined $s; $self->{'svn'} = $s; $self->{'repo'} = $repo; @@ -116,30 +137,86 @@ sub conn { sub file { my($self,$path,$rev) = @_; - my $res; - my ($fh, $name) = tempfile('gitsvn.XXXXXX', + my ($fh, $name) = tempfile('gitsvn.XXXXXX', DIR => File::Spec->tmpdir(), UNLINK => 1); print "... $rev $path ...\n" if $opt_v; - eval { $self->{'svn'}->get_file($path,$rev,$fh); }; - if ($@ and $@ !~ /Attempted to get checksum/) { - # retry - $self->conn(); - eval { $self->{'svn'}->get_file($path,$rev,$fh); }; - }; - return () if $@ and $@ !~ /Attempted to get checksum/; - die $@ if $@; + my (undef, $properties); + my $pool = SVN::Pool->new(); + eval { (undef, $properties) + = $self->{'svn'}->get_file($path,$rev,$fh,$pool); }; + $pool->clear; + if($@) { + return undef if $@ =~ /Attempted to get checksum/; + die $@; + } + my $mode; + if (exists $properties->{'svn:executable'}) { + $mode = '100755'; + } elsif (exists $properties->{'svn:special'}) { + my ($special_content, $filesize); + $filesize = tell $fh; + seek $fh, 0, SEEK_SET; + read $fh, $special_content, $filesize; + if ($special_content =~ s/^link //) { + $mode = '120000'; + seek $fh, 0, SEEK_SET; + truncate $fh, 0; + print $fh $special_content; + } else { + die "unexpected svn:special file encountered"; + } + } else { + $mode = '100644'; + } close ($fh); - return ($name, $res); + return ($name, $mode); } +sub ignore { + my($self,$path,$rev) = @_; -package main; - -my $svn = SVNconn->new($cvs_tree); + print "... $rev $path ...\n" if $opt_v; + my (undef,undef,$properties) + = $self->{'svn'}->get_dir($path,$rev,undef); + if (exists $properties->{'svn:ignore'}) { + my ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), + UNLINK => 1); + print $fh $properties->{'svn:ignore'}; + close($fh); + return $name; + } else { + return undef; + } +} +package main; +use URI; + +our $svn = $svn_url; +$svn .= "/$svn_dir" if defined $svn_dir; +my $svn2 = SVNconn->new($svn); +$svn = SVNconn->new($svn); + +my $lwp_ua; +if($opt_d or $opt_D) { + $svn_url = URI->new($svn_url)->canonical; + if($opt_D) { + $svn_dir =~ s#/*$#/#; + } else { + $svn_dir = ""; + } + if ($svn_url->scheme eq "http") { + use LWP::UserAgent; + $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []); + } else { + print STDERR "Warning: not HTTP; turning off direct file access\n"; + $opt_d=0; + } +} sub pdate($) { my($d) = @_; @@ -158,9 +235,9 @@ sub getwd() { sub get_headref($$) { my $name = shift; - my $git_dir = shift; + my $git_dir = shift; my $sha; - + if (open(C,"$git_dir/refs/heads/$name")) { chomp($sha = ); close(C); @@ -192,7 +269,7 @@ $ENV{GIT_INDEX_FILE} = $git_index; my $maxnum = 0; my $last_rev = ""; my $last_branch; -my $current_rev = $opt_s ? ($opt_s-1) : 0; +my $current_rev = $opt_s || 1; unless(-d $git_dir) { system("git-init-db"); die "Cannot init the GIT db at $git_tree: $?\n" if $?; @@ -210,7 +287,11 @@ unless(-d $git_dir) { -f "$git_dir/svn2git" or die "'$git_dir/svn2git' does not exist.\n". "You need that file for incremental imports.\n"; - $last_branch = basename(readlink("$git_dir/HEAD")); + open(F, "git-symbolic-ref HEAD |") or + die "Cannot run git-symbolic-ref: $!\n"; + chomp ($last_branch = ); + $last_branch = basename($last_branch); + close(F); unless($last_branch) { warn "Cannot read the last branch name: $! -- assuming 'master'\n"; $last_branch = "master"; @@ -230,7 +311,7 @@ EOM $forward_master = $opt_o ne 'master' && -f "$git_dir/refs/heads/master" && - system('cmp', '-s', "$git_dir/refs/heads/master", + system('cmp', '-s', "$git_dir/refs/heads/master", "$git_dir/refs/heads/$opt_o") == 0; # populate index @@ -244,61 +325,121 @@ EOM my($num,$branch,$ref) = split; $branches{$branch}{$num} = $ref; $branches{$branch}{"LAST"} = $ref; - $current_rev = $num+1 if $current_rev < $num+1; + $current_rev = $num+1 if $current_rev <= $num; } close($B); } -d $git_dir or die "Could not create git subdir ($git_dir).\n"; -open BRANCHES,">>", "$git_dir/svn2git"; - +my $default_authors = "$git_dir/svn-authors"; +if ($opt_A) { + read_users($opt_A); + copy($opt_A,$default_authors) or die "Copy failed: $!"; +} else { + read_users($default_authors) if -f $default_authors; +} -## cvsps output: -#--------------------- -#PatchSet 314 -#Date: 1999/09/18 13:03:59 -#Author: wkoch -#Branch: STABLE-BRANCH-1-0 -#Ancestor branch: HEAD -#Tag: (none) -#Log: -# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch -#Members: -# README:1.57->1.57.2.1 -# VERSION:1.96->1.96.2.1 -# -#--------------------- +open BRANCHES,">>", "$git_dir/svn2git"; -my $state = 0; +sub node_kind($$$) { + my ($branch, $path, $revision) = @_; + my $pool=SVN::Pool->new; + my $kind = $svn->{'svn'}->check_path(revert_split_path($branch,$path),$revision,$pool); + $pool->clear; + return $kind; +} -sub get_file($$$) { - my($rev,$branch,$path) = @_; +sub revert_split_path($$) { + my($branch,$path) = @_; - # revert split_path(), below my $svnpath; $path = "" if $path eq "/"; # this should not happen, but ... if($branch eq "/") { - $svnpath = "/$trunk_name/$path"; + $svnpath = "$trunk_name/$path"; } elsif($branch =~ m#^/#) { - $svnpath = "/$tag_name$branch/$path"; + $svnpath = "$tag_name$branch/$path"; } else { - $svnpath = "/$branch_name/$branch/$path"; + $svnpath = "$branch_name/$branch/$path"; } + $svnpath =~ s#/+$##; + return $svnpath; +} + +sub get_file($$$) { + my($rev,$branch,$path) = @_; + + my $svnpath = revert_split_path($branch,$path); + # now get it - my ($name, $res) = eval { $svn->file($svnpath,$rev); }; - return () unless defined $name; + my ($name,$mode); + if($opt_d) { + my($req,$res); + + # /svn/!svn/bc/2/django/trunk/django-docs/build.py + my $url=$svn_url->clone(); + $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath"); + print "... $path...\n" if $opt_v; + $req = HTTP::Request->new(GET => $url); + $res = $lwp_ua->request($req); + if ($res->is_success) { + my $fh; + ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), UNLINK => 1); + print $fh $res->content; + close($fh) or die "Could not write $name: $!\n"; + } else { + return undef if $res->code == 301; # directory? + die $res->status_line." at $url\n"; + } + $mode = '0644'; # can't obtain mode via direct http request? + } else { + ($name,$mode) = $svn->file("$svnpath",$rev); + return undef unless defined $name; + } - open my $F, '-|', "git-hash-object", "-w", $name + my $pid = open(my $F, '-|'); + die $! unless defined $pid; + if (!$pid) { + exec("git-hash-object", "-w", $name) or die "Cannot create object: $!\n"; + } my $sha = <$F>; chomp $sha; close $F; - my $mode = "0644"; # SV does not seem to store any file modes + unlink $name; return [$mode, $sha, $path]; } +sub get_ignore($$$$$) { + my($new,$old,$rev,$branch,$path) = @_; + + return unless $opt_I; + my $svnpath = revert_split_path($branch,$path); + my $name = $svn->ignore("$svnpath",$rev); + if ($path eq '/') { + $path = $opt_I; + } else { + $path = File::Spec->catfile($path,$opt_I); + } + if (defined $name) { + my $pid = open(my $F, '-|'); + die $! unless defined $pid; + if (!$pid) { + exec("git-hash-object", "-w", $name) + or die "Cannot create object: $!\n"; + } + my $sha = <$F>; + chomp $sha; + close $F; + unlink $name; + push(@$new,['0644',$sha,$path]); + } else { + push(@$old,$path); + } +} + sub split_path($$) { my($rev,$path) = @_; my $branch; @@ -310,20 +451,95 @@ sub split_path($$) { } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) { $branch = $1; } else { - print STDERR "$rev: Unrecognized path: $path\n"; + my %no_error = ( + "/" => 1, + "/$tag_name" => 1, + "/$branch_name" => 1 + ); + print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path}); return () } $path = "/" if $path eq ""; return ($branch,$path); } +sub branch_rev($$) { + + my ($srcbranch,$uptorev) = @_; + + my $bbranches = $branches{$srcbranch}; + my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches; + my $therev; + foreach my $arev(@revs) { + next if ($arev eq 'LAST'); + if ($arev <= $uptorev) { + $therev = $arev; + last; + } + } + return $therev; +} + +sub copy_path($$$$$$$$) { + # Somebody copied a whole subdirectory. + # We need to find the index entries from the old version which the + # SVN log entry points to, and add them to the new place. + + my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_; + + my($srcbranch,$srcpath) = split_path($rev,$oldpath); + unless(defined $srcbranch) { + print "Path not found when copying from $oldpath @ $rev\n"; + return; + } + my $therev = branch_rev($srcbranch, $rev); + my $gitrev = $branches{$srcbranch}{$therev}; + unless($gitrev) { + print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n"; + return; + } + if ($srcbranch ne $newbranch) { + push(@$parents, $branches{$srcbranch}{'LAST'}); + } + print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v; + if ($node_kind eq $SVN::Node::dir) { + $srcpath =~ s#/*$#/#; + } + + my $pid = open my $f,'-|'; + die $! unless defined $pid; + if (!$pid) { + exec("git-ls-tree","-r","-z",$gitrev,$srcpath) + or die $!; + } + local $/ = "\0"; + while(<$f>) { + chomp; + my($m,$p) = split(/\t/,$_,2); + my($mode,$type,$sha1) = split(/ /,$m); + next if $type ne "blob"; + if ($node_kind eq $SVN::Node::dir) { + $p = $path . substr($p,length($srcpath)-1); + } else { + $p = $path; + } + push(@$new,[$mode,$sha1,$p]); + } + close($f) or + print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n"; +} + sub commit { my($branch, $changed_paths, $revision, $author, $date, $message) = @_; my($author_name,$author_email,$dest); - my(@old,@new); + my(@old,@new,@parents); if (not defined $author) { $author_name = $author_email = "unknown"; + } elsif (defined $users_file) { + die "User $author is not listed in $users_file\n" + unless exists $users{$author}; + ($author_name,$author_email) = @{$users{$author}}; } elsif ($author =~ /^(.*?)\s+<(.*)>$/) { ($author_name, $author_email) = ($1, $2); } else { @@ -407,56 +623,85 @@ sub commit { $last_rev = $rev; } + push (@parents, $rev) if defined $rev; + my $cid; if($tag and not %$changed_paths) { $cid = $rev; } else { - while(my($path,$action) = each %$changed_paths) { - if ($action->[0] eq "A") { - my $f = get_file($revision,$branch,$path); - push(@new,$f) if $f; + my @paths = sort keys %$changed_paths; + foreach my $path(@paths) { + my $action = $changed_paths->{$path}; + + if ($action->[0] eq "R") { + # refer to a file/tree in an earlier commit + push(@old,$path); # remove any old stuff + } + if(($action->[0] eq "A") || ($action->[0] eq "R")) { + my $node_kind = node_kind($branch,$path,$revision); + if ($node_kind eq $SVN::Node::file) { + my $f = get_file($revision,$branch,$path); + if ($f) { + push(@new,$f) if $f; + } else { + my $opath = $action->[3]; + print STDERR "$revision: $branch: could not fetch '$opath'\n"; + } + } elsif ($node_kind eq $SVN::Node::dir) { + if($action->[1]) { + copy_path($revision, $branch, + $path, $action->[1], + $action->[2], $node_kind, + \@new, \@parents); + } else { + get_ignore(\@new, \@old, $revision, + $branch, $path); + } + } } elsif ($action->[0] eq "D") { push(@old,$path); } elsif ($action->[0] eq "M") { - my $f = get_file($revision,$branch,$path); - push(@new,$f) if $f; - } elsif ($action->[0] eq "R") { - # refer to a file/tree in an earlier commit - push(@old,$path); # remove any old stuff - - # ... and add any new stuff - my($b,$p) = split_path($revision,$action->[1]); - open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p; - local $/ = '\0'; - while(<$F>) { - chomp; - my($m,$p) = split(/\t/,$_,2); - my($mode,$type,$sha1) = split(/ /,$m); - next if $type ne "blob"; - push(@new,[$mode,$sha1,$p]); + my $node_kind = node_kind($branch,$path,$revision); + if ($node_kind eq $SVN::Node::file) { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; + } elsif ($node_kind eq $SVN::Node::dir) { + get_ignore(\@new, \@old, $revision, + $branch,$path); } } else { die "$revision: unknown action '".$action->[0]."' for $path\n"; } } - if(@old) { - open my $F, "-|", "git-ls-files", "-z", @old or die $!; - @old = (); - local $/ = '\0'; + while(@old) { + my @o1; + if(@old > 55) { + @o1 = splice(@old,0,50); + } else { + @o1 = @old; + @old = (); + } + my $pid = open my $F, "-|"; + die "$!" unless defined $pid; + if (!$pid) { + exec("git-ls-files", "-z", @o1) or die $!; + } + @o1 = (); + local $/ = "\0"; while(<$F>) { chomp; - push(@old,$_); + push(@o1,$_); } close($F); - while(@old) { + while(@o1) { my @o2; - if(@old > 55) { - @o2 = splice(@old,0,50); + if(@o1 > 55) { + @o2 = splice(@o1,0,50); } else { - @o2 = @old; - @old = (); + @o2 = @o1; + @o1 = (); } system("git-update-index","--force-remove","--",@o2); die "Cannot remove files: $?\n" if $?; @@ -502,7 +747,6 @@ sub commit { $pw->close(); my @par = (); - @par = ("-p",$rev) if defined $rev; # loose detection of merges # based on the commit msg @@ -512,10 +756,16 @@ sub commit { if ($mparent eq 'HEAD') { $mparent = $opt_o }; if ( -e "$git_dir/refs/heads/$mparent") { $mparent = get_headref($mparent, $git_dir); - push @par, '-p', $mparent; + push (@parents, $mparent); print OUT "Merge parent branch: $mparent\n" if $opt_v; } - } + } + } + my %seen_parents = (); + my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents; + foreach my $bparent (@unique_parents) { + push @par, '-p', $bparent; + print OUT "Merge parent branch: $bparent\n" if $opt_v; } exec("env", @@ -532,6 +782,7 @@ sub commit { $pr->reader(); $message =~ s/[\s\n]+\z//; + $message = "r$revision: $message" if $opt_r; print $pw "$message\n" or die "Error writing to git-commit-tree: $!\n"; @@ -548,27 +799,27 @@ sub commit { die "Error running git-commit-tree: $?\n" if $?; } + if (not defined $cid) { + $cid = $branches{"/"}{"LAST"}; + } + if(not defined $dest) { print "... no known parent\n" if $opt_v; } elsif(not $tag) { print "Writing to refs/heads/$dest\n" if $opt_v; - open(C,">$git_dir/refs/heads/$dest") and + open(C,">$git_dir/refs/heads/$dest") and print C ("$cid\n") and close(C) or die "Cannot write branch $dest for update: $!\n"; } - $branches{$branch}{"LAST"} = $cid; - $branches{$branch}{$revision} = $cid; - $last_rev = $cid; - print BRANCHES "$revision $branch $cid\n"; - print "DONE: $revision $dest $cid\n" if $opt_v; if($tag) { my($in, $out) = ('',''); $last_rev = "-" if %$changed_paths; # the tag was 'complex', i.e. did not refer to a "real" revision - + $dest =~ tr/_/\./ if $opt_u; + $branch = $dest; my $pid = open2($in, $out, 'git-mktag'); print $out ("object $cid\n". @@ -593,19 +844,24 @@ sub commit { print "Created tag '$dest' on '$branch'\n" if $opt_v; } + $branches{$branch}{"LAST"} = $cid; + $branches{$branch}{$revision} = $cid; + $last_rev = $cid; + print BRANCHES "$revision $branch $cid\n"; + print "DONE: $revision $dest $cid\n" if $opt_v; } -my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; -sub _commit_all { - ($changed_paths, $revision, $author, $date, $message, $pool) = @_; +sub commit_all { + # Recursive use of the SVN connection does not work + local $svn = $svn2; + + my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; my %p; while(my($path,$action) = each %$changed_paths) { - $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ]; + $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ]; } $changed_paths = \%p; -} -sub commit_all { my %done; my @col; my $pref; @@ -621,11 +877,20 @@ sub commit_all { } } -while(++$current_rev < $svn->{'maxrev'}) { - $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,""); - commit_all(); +$opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'}; + +if ($opt_l < $current_rev) { + print "Up to date: no new revisions to fetch!\n" if $opt_v; + unlink("$git_dir/SVN2GIT_HEAD"); + exit; } +print "Fetching from $current_rev to $opt_l ...\n" if $opt_v; + +my $pool=SVN::Pool->new; +$svn->{'svn'}->get_log("/",$current_rev,$opt_l,0,1,1,\&commit_all,$pool); +$pool->clear; + unlink($git_index); @@ -637,7 +902,7 @@ if (defined $orig_git_index) { # Now switch back to the branch we were in before all of this happened if($orig_branch) { - print "DONE\n" if $opt_v; + print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0); system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") if $forward_master; unless ($opt_i) { @@ -646,11 +911,10 @@ if($orig_branch) { } } else { $orig_branch = "master"; - print "DONE; creating $orig_branch branch\n" if $opt_v; + print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0); system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") unless -f "$git_dir/refs/heads/master"; - unlink("$git_dir/HEAD"); - symlink("refs/heads/$orig_branch","$git_dir/HEAD"); + system('git-update-ref', 'HEAD', "$orig_branch"); unless ($opt_i) { system('git checkout'); die "checkout failed: $?\n" if $?;