X-Git-Url: https://git.octo.it/?a=blobdiff_plain;f=git-svnimport.perl;h=20a85724cb1660a3411fa34717cd5f1e98ebd731;hb=303958dc42d451aead0e1b9cf7b9836831a05f4b;hp=ba9f818a06341a414eb8d21fa8ad29d225895f20;hpb=8cd4177d5ef651c8acb9c1f113a076e6fae2e1c2;p=git.git diff --git a/git-svnimport.perl b/git-svnimport.perl index ba9f818a..20a85724 100755 --- a/git-svnimport.perl +++ b/git-svnimport.perl @@ -5,8 +5,7 @@ # # The basic idea is to pull and analyze SVN changes. # -# Checking out the files is done by a single long-running CVS connection -# / server process. +# Checking out the files is done by a single long-running SVN connection. # # The head revision is on branch "origin" by default. # You can change that with the '-o' option. @@ -26,48 +25,40 @@ use IPC::Open2; use SVN::Core; use SVN::Ra; -die "Need CVN:COre 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1"; +die "Need CVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1"; $SIG{'PIPE'}="IGNORE"; $ENV{'TZ'}="UTC"; -our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b); +our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_s,$opt_l,$opt_d,$opt_D); sub usage() { print STDERR <; - chomp $cvs_tree; - close $f; -} else { - usage(); -} +my $svn_url = $ARGV[0]; +my $svn_dir = $ARGV[1]; our @mergerx = (); if ($opt_m) { @@ -110,39 +101,50 @@ sub conn { die "SVN connection to $repo: $!\n" unless defined $s; $self->{'svn'} = $s; - print STDERR "*** SVN *** $s ***\n"; $self->{'repo'} = $repo; $self->{'maxrev'} = $s->get_latest_revnum(); } sub file { my($self,$path,$rev) = @_; - my $res; - my ($fh, $name) = tempfile('gitsvn.XXXXXX', + my ($fh, $name) = tempfile('gitsvn.XXXXXX', DIR => File::Spec->tmpdir(), UNLINK => 1); print "... $rev $path ...\n" if $opt_v; - my $s = $self->{'svn'}; - print STDERR "*** GET *** $s ***\n"; - eval { $s->get_file($path,$rev,$fh); }; - if ($@ and $@ !~ /Attempted to get checksum/) { - # retry - $self->conn(); - eval { $self->{'svn'}->get_file($path,$rev,$fh); }; - }; - return () if $@ and $@ !~ /Attempted to get checksum/; - die $@ if $@; + eval { $self->{'svn'}->get_file($path,$rev,$fh); }; + if($@) { + return undef if $@ =~ /Attempted to get checksum/; + die $@; + } close ($fh); - return ($name, $res); + return $name; } - package main; +use URI; -my $svn = SVNconn->new($cvs_tree); +my $svn = $svn_url; +$svn .= "/$svn_dir" if defined $svn_dir; +$svn = SVNconn->new($svn); +my $lwp_ua; +if($opt_d or $opt_D) { + $svn_url = URI->new($svn_url)->canonical; + if($opt_D) { + $svn_dir =~ s#/*$#/#; + } else { + $svn_dir = ""; + } + if ($svn_url->scheme eq "http") { + use LWP::UserAgent; + $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []); + } else { + print STDERR "Warning: not HTTP; turning off direct file access\n"; + $opt_d=0; + } +} sub pdate($) { my($d) = @_; @@ -161,9 +163,9 @@ sub getwd() { sub get_headref($$) { my $name = shift; - my $git_dir = shift; + my $git_dir = shift; my $sha; - + if (open(C,"$git_dir/refs/heads/$name")) { chomp($sha = ); close(C); @@ -195,7 +197,7 @@ $ENV{GIT_INDEX_FILE} = $git_index; my $maxnum = 0; my $last_rev = ""; my $last_branch; -my $current_rev = 0; +my $current_rev = $opt_s-1; unless(-d $git_dir) { system("git-init-db"); die "Cannot init the GIT db at $git_tree: $?\n" if $?; @@ -233,7 +235,7 @@ EOM $forward_master = $opt_o ne 'master' && -f "$git_dir/refs/heads/master" && - system('cmp', '-s', "$git_dir/refs/heads/master", + system('cmp', '-s', "$git_dir/refs/heads/master", "$git_dir/refs/heads/$opt_o") == 0; # populate index @@ -247,7 +249,7 @@ EOM my($num,$branch,$ref) = split; $branches{$branch}{$num} = $ref; $branches{$branch}{"LAST"} = $ref; - $current_rev = $num+1 if $current_rev < $num+1; + $current_rev = $num if $current_rev < $num; } close($B); } @@ -256,25 +258,6 @@ EOM open BRANCHES,">>", "$git_dir/svn2git"; - -## cvsps output: -#--------------------- -#PatchSet 314 -#Date: 1999/09/18 13:03:59 -#Author: wkoch -#Branch: STABLE-BRANCH-1-0 -#Ancestor branch: HEAD -#Tag: (none) -#Log: -# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch -#Members: -# README:1.57->1.57.2.1 -# VERSION:1.96->1.96.2.1 -# -#--------------------- - -my $state = 0; - sub get_file($$$) { my($rev,$branch,$path) = @_; @@ -282,22 +265,45 @@ sub get_file($$$) { my $svnpath; $path = "" if $path eq "/"; # this should not happen, but ... if($branch eq "/") { - $svnpath = "/$trunk_name/$path"; + $svnpath = "$trunk_name/$path"; } elsif($branch =~ m#^/#) { - $svnpath = "/$tag_name$branch/$path"; + $svnpath = "$tag_name$branch/$path"; } else { - $svnpath = "/$branch_name/$branch/$path"; + $svnpath = "$branch_name/$branch/$path"; } # now get it - my ($name, $res) = eval { $svn->file($svnpath,$rev); }; - return () unless defined $name; + my $name; + if($opt_d) { + my($req,$res); + + # /svn/!svn/bc/2/django/trunk/django-docs/build.py + my $url=$svn_url->clone(); + $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath"); + print "... $path...\n" if $opt_v; + $req = HTTP::Request->new(GET => $url); + $res = $lwp_ua->request($req); + if ($res->is_success) { + my $fh; + ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), UNLINK => 1); + print $fh $res->content; + close($fh) or die "Could not write $name: $!\n"; + } else { + return undef if $res->code == 301; # directory? + die $res->status_line." at $url\n"; + } + } else { + $name = $svn->file("/$svnpath",$rev); + return undef unless defined $name; + } open my $F, '-|', "git-hash-object", "-w", $name or die "Cannot create object: $!\n"; my $sha = <$F>; chomp $sha; close $F; + unlink $name; my $mode = "0644"; # SV does not seem to store any file modes return [$mode, $sha, $path]; } @@ -320,6 +326,36 @@ sub split_path($$) { return ($branch,$path); } +sub copy_subdir($$$$$$) { + # Somebody copied a whole subdirectory. + # We need to find the index entries from the old version which the + # SVN log entry points to, and add them to the new place. + + my($newrev,$newbranch,$path,$oldpath,$rev,$new) = @_; + my($branch,$srcpath) = split_path($rev,$oldpath); + + my $gitrev = $branches{$branch}{$rev}; + unless($gitrev) { + print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n"; + return; + } + print "$newrev:$newbranch:$path: copying from $branch:$srcpath @ $rev\n" if $opt_v; + $srcpath =~ s#/*$#/#; + open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath; + local $/ = "\0"; + while(<$f>) { + chomp; + my($m,$p) = split(/\t/,$_,2); + my($mode,$type,$sha1) = split(/ /,$m); + next if $type ne "blob"; + $p = substr($p,length($srcpath)-1); + print "... found $path$p ...\n" if $opt_v; + push(@$new,[$mode,$sha1,$path.$p]); + } + close($f) or + print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n"; +} + sub commit { my($branch, $changed_paths, $revision, $author, $date, $message) = @_; my($author_name,$author_email,$dest); @@ -367,7 +403,7 @@ sub commit { } my $rev; - if(defined $parent) { + if($revision > $opt_s and defined $parent) { open(H,"git-rev-parse --verify $parent |"); $rev = ; close(H) or do { @@ -380,7 +416,7 @@ sub commit { return; } $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"}; - if($revision != 1 and not $rev) { + if($revision != $opt_s and not $rev) { print STDERR "$revision: do not know ancestor for '$parent'!\n"; return; } @@ -410,190 +446,205 @@ sub commit { $last_rev = $rev; } - while(my($path,$action) = each %$changed_paths) { - if ($action->[0] eq "A") { - my $f = get_file($revision,$branch,$path); - push(@new,$f) if $f; - } elsif ($action->[0] eq "D") { - push(@old,$path); - } elsif ($action->[0] eq "M") { - my $f = get_file($revision,$branch,$path); - push(@new,$f) if $f; - } elsif ($action->[0] eq "R") { - # refer to a file/tree in an earlier commit - push(@old,$path); # remove any old stuff - - # ... and add any new stuff - my($b,$p) = split_path($revision,$action->[1]); - open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p; - local $/ = '\0'; - while(<$F>) { - chomp; - my($m,$p) = split(/\t/,$_,2); - my($mode,$type,$sha1) = split(/ /,$m); - next if $type ne "blob"; - push(@new,[$mode,$sha1,$p]); + my $cid; + if($tag and not %$changed_paths) { + $cid = $rev; + } else { + my @paths = sort keys %$changed_paths; + foreach my $path(@paths) { + my $action = $changed_paths->{$path}; + + if ($action->[0] eq "A") { + my $f = get_file($revision,$branch,$path); + if($f) { + push(@new,$f) if $f; + } elsif($action->[1]) { + copy_subdir($revision,$branch,$path,$action->[1],$action->[2],\@new); + } else { + my $opath = $action->[3]; + print STDERR "$revision: $branch: could not fetch '$opath'\n"; + } + } elsif ($action->[0] eq "D") { + push(@old,$path); + } elsif ($action->[0] eq "M") { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; + } elsif ($action->[0] eq "R") { + # refer to a file/tree in an earlier commit + push(@old,$path); # remove any old stuff + + # ... and add any new stuff + my($b,$srcpath) = split_path($revision,$action->[1]); + $srcpath =~ s#/*$#/#; + open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $srcpath; + local $/ = "\0"; + while(<$F>) { + chomp; + my($m,$p) = split(/\t/,$_,2); + my($mode,$type,$sha1) = split(/ /,$m); + next if $type ne "blob"; + $p = substr($p,length($srcpath)-1); + push(@new,[$mode,$sha1,$path.$p]); + } + close($F); + } else { + die "$revision: unknown action '".$action->[0]."' for $path\n"; } - } else { - die "$revision: unknown action '".$action->[0]."' for $path\n"; } - } - if(@old) { - open my $F, "-│", "git-ls-files", "-z", @old or die $!; - @old = (); - local $/ = '\0'; - while(<$F>) { - chomp; - push(@old,$_); + if(@old) { + open my $F, "-|", "git-ls-files", "-z", @old or die $!; + @old = (); + local $/ = "\0"; + while(<$F>) { + chomp; + push(@old,$_); + } + close($F); + + while(@old) { + my @o2; + if(@old > 55) { + @o2 = splice(@old,0,50); + } else { + @o2 = @old; + @old = (); + } + system("git-update-index","--force-remove","--",@o2); + die "Cannot remove files: $?\n" if $?; + } } - close($F); - - while(@old) { - my @o2; - if(@old > 55) { - @o2 = splice(@old,0,50); + while(@new) { + my @n2; + if(@new > 12) { + @n2 = splice(@new,0,10); } else { - @o2 = @old; - @old = (); + @n2 = @new; + @new = (); } - system("git-update-index","--force-remove","--",@o2); - die "Cannot remove files: $?\n" if $?; + system("git-update-index","--add", + (map { ('--cacheinfo', @$_) } @n2)); + die "Cannot add files: $?\n" if $?; } - } - while(@new) { - my @n2; - if(@new > 12) { - @n2 = splice(@new,0,10); - } else { - @n2 = @new; - @new = (); - } - system("git-update-index","--add", - (map { ('--cacheinfo', @$_) } @n2)); - die "Cannot add files: $?\n" if $?; - } - - my $pid = open(C,"-|"); - die "Cannot fork: $!" unless defined $pid; - unless($pid) { - exec("git-write-tree"); - die "Cannot exec git-write-tree: $!\n"; - } - chomp(my $tree = ); - length($tree) == 40 - or die "Cannot get tree id ($tree): $!\n"; - close(C) - or die "Error running git-write-tree: $?\n"; - print "Tree ID $tree\n" if $opt_v; - - my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n"; - my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n"; - $pid = fork(); - die "Fork: $!\n" unless defined $pid; - unless($pid) { - $pr->writer(); - $pw->reader(); - open(OUT,">&STDOUT"); - dup2($pw->fileno(),0); - dup2($pr->fileno(),1); - $pr->close(); - $pw->close(); - my @par = (); - @par = ("-p",$rev) if defined $rev; - - # loose detection of merges - # based on the commit msg - foreach my $rx (@mergerx) { - if ($message =~ $rx) { - my $mparent = $1; - if ($mparent eq 'HEAD') { $mparent = $opt_o }; - if ( -e "$git_dir/refs/heads/$mparent") { - $mparent = get_headref($mparent, $git_dir); - push @par, '-p', $mparent; - print OUT "Merge parent branch: $mparent\n" if $opt_v; - } - } + my $pid = open(C,"-|"); + die "Cannot fork: $!" unless defined $pid; + unless($pid) { + exec("git-write-tree"); + die "Cannot exec git-write-tree: $!\n"; } + chomp(my $tree = ); + length($tree) == 40 + or die "Cannot get tree id ($tree): $!\n"; + close(C) + or die "Error running git-write-tree: $?\n"; + print "Tree ID $tree\n" if $opt_v; + + my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n"; + my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n"; + $pid = fork(); + die "Fork: $!\n" unless defined $pid; + unless($pid) { + $pr->writer(); + $pw->reader(); + open(OUT,">&STDOUT"); + dup2($pw->fileno(),0); + dup2($pr->fileno(),1); + $pr->close(); + $pw->close(); + + my @par = (); + @par = ("-p",$rev) if defined $rev; + + # loose detection of merges + # based on the commit msg + foreach my $rx (@mergerx) { + if ($message =~ $rx) { + my $mparent = $1; + if ($mparent eq 'HEAD') { $mparent = $opt_o }; + if ( -e "$git_dir/refs/heads/$mparent") { + $mparent = get_headref($mparent, $git_dir); + push @par, '-p', $mparent; + print OUT "Merge parent branch: $mparent\n" if $opt_v; + } + } + } - exec("env", - "GIT_AUTHOR_NAME=$author_name", - "GIT_AUTHOR_EMAIL=$author_email", - "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), - "GIT_COMMITTER_NAME=$author_name", - "GIT_COMMITTER_EMAIL=$author_email", - "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), - "git-commit-tree", $tree,@par); - die "Cannot exec git-commit-tree: $!\n"; - } - $pw->writer(); - $pr->reader(); + exec("env", + "GIT_AUTHOR_NAME=$author_name", + "GIT_AUTHOR_EMAIL=$author_email", + "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), + "GIT_COMMITTER_NAME=$author_name", + "GIT_COMMITTER_EMAIL=$author_email", + "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), + "git-commit-tree", $tree,@par); + die "Cannot exec git-commit-tree: $!\n"; + } + $pw->writer(); + $pr->reader(); - $message =~ s/[\s\n]+\z//; + $message =~ s/[\s\n]+\z//; - print $pw "$message\n" - or die "Error writing to git-commit-tree: $!\n"; - $pw->close(); + print $pw "$message\n" + or die "Error writing to git-commit-tree: $!\n"; + $pw->close(); - print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v; - chomp(my $cid = <$pr>); - length($cid) == 40 - or die "Cannot get commit id ($cid): $!\n"; - print "Commit ID $cid\n" if $opt_v; - $pr->close(); + print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v; + chomp($cid = <$pr>); + length($cid) == 40 + or die "Cannot get commit id ($cid): $!\n"; + print "Commit ID $cid\n" if $opt_v; + $pr->close(); - waitpid($pid,0); - die "Error running git-commit-tree: $?\n" if $?; + waitpid($pid,0); + die "Error running git-commit-tree: $?\n" if $?; + } - if(defined $dest) { + if(not defined $dest) { + print "... no known parent\n" if $opt_v; + } elsif(not $tag) { print "Writing to refs/heads/$dest\n" if $opt_v; - open(C,">$git_dir/refs/heads/$dest") and + open(C,">$git_dir/refs/heads/$dest") and print C ("$cid\n") and close(C) or die "Cannot write branch $dest for update: $!\n"; - } else { - print "... no known parent\n" if $opt_v; } - $branches{$branch}{"LAST"} = $cid; - $branches{$branch}{$revision} = $cid; - $last_rev = $cid; - print BRANCHES "$revision $branch $cid\n"; - print "DONE: $revision $dest $cid\n" if $opt_v; if($tag) { my($in, $out) = ('',''); $last_rev = "-" if %$changed_paths; # the tag was 'complex', i.e. did not refer to a "real" revision - - $tag =~ tr/_/\./ if $opt_u; + + $dest =~ tr/_/\./ if $opt_u; my $pid = open2($in, $out, 'git-mktag'); print $out ("object $cid\n". "type commit\n". - "tag $tag\n". + "tag $dest\n". "tagger $author_name <$author_email>\n") and close($out) - or die "Cannot create tag object $tag: $!\n"; + or die "Cannot create tag object $dest: $!\n"; my $tagobj = <$in>; chomp $tagobj; if ( !close($in) or waitpid($pid, 0) != $pid or $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) { - die "Cannot create tag object $tag: $!\n"; + die "Cannot create tag object $dest: $!\n"; } - - open(C,">$git_dir/refs/tags/$tag") - or die "Cannot create tag $tag: $!\n"; - print C "$tagobj\n" - or die "Cannot write tag $tag: $!\n"; + open(C,">$git_dir/refs/tags/$dest") and + print C ("$tagobj\n") and close(C) - or die "Cannot write tag $tag: $!\n"; + or die "Cannot create tag $branch: $!\n"; - print "Created tag '$tag' on '$branch'\n" if $opt_v; + print "Created tag '$dest' on '$branch'\n" if $opt_v; } + $branches{$branch}{"LAST"} = $cid; + $branches{$branch}{$revision} = $cid; + $last_rev = $cid; + print BRANCHES "$revision $branch $cid\n"; + print "DONE: $revision $dest $cid\n" if $opt_v; } my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; @@ -601,7 +652,7 @@ sub _commit_all { ($changed_paths, $revision, $author, $date, $message, $pool) = @_; my %p; while(my($path,$action) = each %$changed_paths) { - $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ]; + $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ]; } $changed_paths = \%p; } @@ -622,9 +673,14 @@ sub commit_all { } } -while(++$current_rev < $svn->{'maxrev'}) { +while(++$current_rev <= $svn->{'maxrev'}) { $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,""); commit_all(); + if($opt_l and not --$opt_l) { + print STDERR "Stopping, because there is a memory leak (in the SVN library).\n"; + print STDERR "Please repeat this command; it will continue safely\n"; + last; + } } @@ -638,7 +694,7 @@ if (defined $orig_git_index) { # Now switch back to the branch we were in before all of this happened if($orig_branch) { - print "DONE\n" if $opt_v; + print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0); system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") if $forward_master; unless ($opt_i) { @@ -647,7 +703,7 @@ if($orig_branch) { } } else { $orig_branch = "master"; - print "DONE; creating $orig_branch branch\n" if $opt_v; + print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0); system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") unless -f "$git_dir/refs/heads/master"; unlink("$git_dir/HEAD");