git-svn: eol_cp corner-case fixes
[git.git] / contrib / git-svn / git-svn.perl
index 43b50ec..8d2e7f7 100755 (executable)
@@ -8,8 +8,12 @@ use vars qw/   $AUTHOR $VERSION
                $GIT_SVN_INDEX $GIT_SVN
                $GIT_DIR $REV_DIR/;
 $AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
-$VERSION = '0.10.0';
-$GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git";
+$VERSION = '1.1.0-pre';
+
+use Cwd qw/abs_path/;
+$GIT_DIR = abs_path($ENV{GIT_DIR} || '.git');
+$ENV{GIT_DIR} = $GIT_DIR;
+
 # make sure the svn binary gives consistent output between locales and TZs:
 $ENV{TZ} = 'UTC';
 $ENV{LC_ALL} = 'C';
@@ -30,15 +34,20 @@ my $sha1_short = qr/[a-f\d]{4,40}/;
 my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
        $_find_copies_harder, $_l, $_version, $_upgrade, $_authors);
 my (@_branch_from, %tree_map, %users);
-my $_svn_co_url_revs;
+my ($_svn_co_url_revs, $_svn_pg_peg_revs);
 
 my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext,
                'branch|b=s' => \@_branch_from,
                'authors-file|A=s' => \$_authors );
+
+# yes, 'native' sets "\n".  Patches to fix this for non-*nix systems welcome:
+my %EOL = ( CR => "\015", LF => "\012", CRLF => "\015\012", native => "\012" );
+
 my %cmd = (
        fetch => [ \&fetch, "Download new revisions from SVN",
                        { 'revision|r=s' => \$_revision, %fc_opts } ],
-       init => [ \&init, "Initialize and fetch (import)", { } ],
+       init => [ \&init, "Initialize a repo for tracking" .
+                         " (requires URL argument)", { } ],
        commit => [ \&commit, "Commit git revisions to SVN",
                        {       'stdin|' => \$_stdin,
                                'edit|e' => \$_edit,
@@ -63,13 +72,31 @@ for (my $i = 0; $i < @ARGV; $i++) {
 
 my %opts = %{$cmd{$cmd}->[2]} if (defined $cmd);
 
+# convert GetOpt::Long specs for use by git-repo-config
+foreach my $o (keys %opts) {
+       my $v = $opts{$o};
+       my ($key) = ($o =~ /^([a-z\-]+)/);
+       $key =~ s/-//g;
+       my $arg = 'git-repo-config';
+       $arg .= ' --int' if ($o =~ /=i$/);
+       $arg .= ' --bool' if ($o !~ /=[sfi]$/);
+       if (ref $v eq 'ARRAY') {
+               chomp(my @tmp = `$arg --get-all svn.$key`);
+               @$v = @tmp if @tmp;
+       } else {
+               chomp(my $tmp = `$arg --get svn.$key`);
+               if ($tmp && !($arg =~ / --bool / && $tmp eq 'false')) {
+                       $$v = $tmp;
+               }
+       }
+}
+
 GetOptions(%opts, 'help|H|h' => \$_help,
                'version|V' => \$_version,
                'id|i=s' => \$GIT_SVN) or exit 1;
 
 $GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn';
 $GIT_SVN_INDEX = "$GIT_DIR/$GIT_SVN/index";
-$ENV{GIT_DIR} ||= $GIT_DIR;
 $SVN_URL = undef;
 $REV_DIR = "$GIT_DIR/$GIT_SVN/revs";
 $SVN_WC = "$GIT_DIR/$GIT_SVN/tree";
@@ -155,7 +182,7 @@ sub rebuild {
                # if we merged or otherwise started elsewhere, this is
                # how we break out of it
                next if (defined $SVN_UUID && ($uuid ne $SVN_UUID));
-               next if (defined $SVN_URL && ($url ne $SVN_URL));
+               next if (defined $SVN_URL && defined $url && ($url ne $SVN_URL));
 
                print "r$rev = $c\n";
                unless (defined $latest) {
@@ -184,7 +211,7 @@ sub rebuild {
                push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
                sys(@svn_up,"-r$newest_rev");
                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
-               git_addremove();
+               index_changes();
                exec('git-write-tree');
        }
        waitpid $pid, 0;
@@ -198,7 +225,8 @@ when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
 }
 
 sub init {
-       $SVN_URL = shift or croak "SVN repository location required\n";
+       $SVN_URL = shift or die "SVN repository location required " .
+                               "as a command-line argument\n";
        unless (-d $GIT_DIR) {
                sys('git-init-db');
        }
@@ -217,16 +245,15 @@ sub fetch {
        push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 
        my $svn_log = svn_log_raw(@log_args);
-       @$svn_log = sort { $a->{revision} <=> $b->{revision} } @$svn_log;
 
-       my $base = shift @$svn_log or croak "No base revision!\n";
+       my $base = next_log_entry($svn_log) or croak "No base revision!\n";
        my $last_commit = undef;
        unless (-d $SVN_WC) {
                svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
                chdir $SVN_WC or croak $!;
                read_uuid();
                $last_commit = git_commit($base, @parents);
-               assert_svn_wc_clean($base->{revision}, $last_commit);
+               assert_tree($last_commit);
        } else {
                chdir $SVN_WC or croak $!;
                read_uuid();
@@ -234,18 +261,26 @@ sub fetch {
        }
        my @svn_up = qw(svn up);
        push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
-       my $last_rev = $base->{revision};
-       foreach my $log_msg (@$svn_log) {
-               assert_svn_wc_clean($last_rev, $last_commit);
-               $last_rev = $log_msg->{revision};
-               sys(@svn_up,"-r$last_rev");
+       my $last = $base;
+       while (my $log_msg = next_log_entry($svn_log)) {
+               assert_tree($last_commit);
+               if ($last->{revision} >= $log_msg->{revision}) {
+                       croak "Out of order: last >= current: ",
+                               "$last->{revision} >= $log_msg->{revision}\n";
+               }
+               # Revert is needed for cases like:
+               # https://svn.musicpd.org/Jamming/trunk (r166:167), but
+               # I can't seem to reproduce something like that on a test...
+               sys(qw/svn revert -R ./);
+               assert_svn_wc_clean($last->{revision});
+               sys(@svn_up,"-r$log_msg->{revision}");
                $last_commit = git_commit($log_msg, $last_commit, @parents);
+               $last = $log_msg;
        }
-       assert_svn_wc_clean($last_rev, $last_commit);
        unless (-e "$GIT_DIR/refs/heads/master") {
                sys(qw(git-update-ref refs/heads/master),$last_commit);
        }
-       return pop @$svn_log;
+       return $last;
 }
 
 sub commit {
@@ -287,7 +322,6 @@ sub commit {
                $svn_current_rev = svn_commit_tree($svn_current_rev, $c);
        }
        print "Done committing ",scalar @revs," revisions to SVN\n";
-
 }
 
 sub show_ignore {
@@ -302,7 +336,7 @@ sub show_ignore {
        my %ign;
        File::Find::find({wanted=>sub{if(lstat $_ && -d _ && -d "$_/.svn"){
                s#^\./##;
-               @{$ign{$_}} = safe_qx(qw(svn propget svn:ignore),$_);
+               @{$ign{$_}} = svn_propget_base('svn:ignore', $_);
                }}, no_chdir=>1},'.');
 
        print "\n# /\n";
@@ -340,13 +374,11 @@ sub setup_git_svn {
 }
 
 sub assert_svn_wc_clean {
-       my ($svn_rev, $treeish) = @_;
+       my ($svn_rev) = @_;
        croak "$svn_rev is not an integer!\n" unless ($svn_rev =~ /^\d+$/);
-       croak "$treeish is not a sha1!\n" unless ($treeish =~ /^$sha1$/o);
        my $lcr = svn_info('.')->{'Last Changed Rev'};
        if ($svn_rev != $lcr) {
                print STDERR "Checking for copy-tree ... ";
-               # use
                my @diff = grep(/^Index: /,(safe_qx(qw(svn diff),
                                                "-r$lcr:$svn_rev")));
                if (@diff) {
@@ -362,7 +394,6 @@ sub assert_svn_wc_clean {
                print STDERR $_ foreach @status;
                croak;
        }
-       assert_tree($treeish);
 }
 
 sub assert_tree {
@@ -389,7 +420,7 @@ sub assert_tree {
                unlink $tmpindex or croak $!;
        }
        $ENV{GIT_INDEX_FILE} = $tmpindex;
-       git_addremove();
+       index_changes(1);
        chomp(my $tree = `git-write-tree`);
        if ($old_index) {
                $ENV{GIT_INDEX_FILE} = $old_index;
@@ -399,6 +430,7 @@ sub assert_tree {
        if ($tree ne $expected) {
                croak "Tree mismatch, Got: $tree, Expected: $expected\n";
        }
+       unlink $tmpindex;
 }
 
 sub parse_diff_tree {
@@ -535,7 +567,7 @@ sub precommit_check {
 sub svn_checkout_tree {
        my ($svn_rev, $treeish) = @_;
        my $from = file_to_s("$REV_DIR/$svn_rev");
-       assert_svn_wc_clean($svn_rev,$from);
+       assert_tree($from);
        print "diff-tree $from $treeish\n";
        my $pid = open my $diff_fh, '-|';
        defined $pid or croak $!;
@@ -708,49 +740,61 @@ sub svn_commit_tree {
        return fetch("$rev_committed=$commit")->{revision};
 }
 
+# read the entire log into a temporary file (which is removed ASAP)
+# and store the file handle + parser state
 sub svn_log_raw {
        my (@log_args) = @_;
-       my $pid = open my $log_fh,'-|';
+       my $log_fh = IO::File->new_tmpfile or croak $!;
+       my $pid = fork;
        defined $pid or croak $!;
-
-       if ($pid == 0) {
+       if (!$pid) {
+               open STDOUT, '>&', $log_fh or croak $!;
                exec (qw(svn log), @log_args) or croak $!
        }
+       waitpid $pid, 0;
+       croak if $?;
+       seek $log_fh, 0, 0 or croak $!;
+       return { state => 'sep', fh => $log_fh };
+}
+
+sub next_log_entry {
+       my $log = shift; # retval of svn_log_raw()
+       my $ret = undef;
+       my $fh = $log->{fh};
 
-       my @svn_log;
-       my $state = 'sep';
-       while (<$log_fh>) {
+       while (<$fh>) {
                chomp;
                if (/^\-{72}$/) {
-                       if ($state eq 'msg') {
-                               if ($svn_log[$#svn_log]->{lines}) {
-                                       $svn_log[$#svn_log]->{msg} .= $_."\n";
-                                       unless(--$svn_log[$#svn_log]->{lines}) {
-                                               $state = 'sep';
+                       if ($log->{state} eq 'msg') {
+                               if ($ret->{lines}) {
+                                       $ret->{msg} .= $_."\n";
+                                       unless(--$ret->{lines}) {
+                                               $log->{state} = 'sep';
                                        }
                                } else {
                                        croak "Log parse error at: $_\n",
-                                               $svn_log[$#svn_log]->{revision},
+                                               $ret->{revision},
                                                "\n";
                                }
                                next;
                        }
-                       if ($state ne 'sep') {
+                       if ($log->{state} ne 'sep') {
                                croak "Log parse error at: $_\n",
-                                       "state: $state\n",
-                                       $svn_log[$#svn_log]->{revision},
+                                       "state: $log->{state}\n",
+                                       $ret->{revision},
                                        "\n";
                        }
-                       $state = 'rev';
+                       $log->{state} = 'rev';
 
                        # if we have an empty log message, put something there:
-                       if (@svn_log) {
-                               $svn_log[$#svn_log]->{msg} ||= "\n";
-                               delete $svn_log[$#svn_log]->{lines};
+                       if ($ret) {
+                               $ret->{msg} ||= "\n";
+                               delete $ret->{lines};
+                               return $ret;
                        }
                        next;
                }
-               if ($state eq 'rev' && s/^r(\d+)\s*\|\s*//) {
+               if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
                        my $rev = $1;
                        my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
                        ($lines) = ($lines =~ /(\d+)/);
@@ -758,36 +802,34 @@ sub svn_log_raw {
                                        /(\d{4})\-(\d\d)\-(\d\d)\s
                                         (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
                                         or croak "Failed to parse date: $date\n";
-                       my %log_msg = ( revision => $rev,
+                       $ret = {        revision => $rev,
                                        date => "$tz $Y-$m-$d $H:$M:$S",
                                        author => $author,
                                        lines => $lines,
-                                       msg => '' );
+                                       msg => '' };
                        if (defined $_authors && ! defined $users{$author}) {
                                die "Author: $author not defined in ",
                                                "$_authors file\n";
                        }
-                       push @svn_log, \%log_msg;
-                       $state = 'msg_start';
+                       $log->{state} = 'msg_start';
                        next;
                }
                # skip the first blank line of the message:
-               if ($state eq 'msg_start' && /^$/) {
-                       $state = 'msg';
-               } elsif ($state eq 'msg') {
-                       if ($svn_log[$#svn_log]->{lines}) {
-                               $svn_log[$#svn_log]->{msg} .= $_."\n";
-                               unless (--$svn_log[$#svn_log]->{lines}) {
-                                       $state = 'sep';
+               if ($log->{state} eq 'msg_start' && /^$/) {
+                       $log->{state} = 'msg';
+               } elsif ($log->{state} eq 'msg') {
+                       if ($ret->{lines}) {
+                               $ret->{msg} .= $_."\n";
+                               unless (--$ret->{lines}) {
+                                       $log->{state} = 'sep';
                                }
                        } else {
                                croak "Log parse error at: $_\n",
-                                       $svn_log[$#svn_log]->{revision},"\n";
+                                       $ret->{revision},"\n";
                        }
                }
        }
-       close $log_fh or croak $?;
-       return \@svn_log;
+       return $ret;
 }
 
 sub svn_info {
@@ -815,13 +857,82 @@ sub svn_info {
 
 sub sys { system(@_) == 0 or croak $? }
 
-sub git_addremove {
-       system( "git-diff-files --name-only -z ".
-                               " | git-update-index --remove -z --stdin && ".
-               "git-ls-files -z --others ".
-                       "'--exclude-from=$GIT_DIR/$GIT_SVN/info/exclude'".
-                               " | git-update-index --add -z --stdin"
-               ) == 0 or croak $?
+sub eol_cp {
+       my ($from, $to) = @_;
+       my $es = svn_propget_base('svn:eol-style', $to);
+       open my $rfd, '<', $from or croak $!;
+       binmode $rfd or croak $!;
+       open my $wfd, '>', $to or croak $!;
+       binmode $wfd or croak $!;
+
+       my $eol = $EOL{$es} or undef;
+       my $buf;
+       use bytes;
+       while (1) {
+               my ($r, $w, $t);
+               defined($r = sysread($rfd, $buf, 4096)) or croak $!;
+               return unless $r;
+               if ($eol) {
+                       if ($buf =~ /\015$/) {
+                               my $c;
+                               defined($r = sysread($rfd,$c,1)) or croak $!;
+                               $buf .= $c if $r > 0;
+                       }
+                       $buf =~ s/(?:\015\012|\015|\012)/$eol/gs;
+                       $r = length($buf);
+               }
+               for ($w = 0; $w < $r; $w += $t) {
+                       $t = syswrite($wfd, $buf, $r - $w, $w) or croak $!;
+               }
+       }
+       no bytes;
+}
+
+sub do_update_index {
+       my ($z_cmd, $cmd, $no_text_base) = @_;
+
+       my $z = open my $p, '-|';
+       defined $z or croak $!;
+       unless ($z) { exec @$z_cmd or croak $! }
+
+       my $pid = open my $ui, '|-';
+       defined $pid or croak $!;
+       unless ($pid) {
+               exec('git-update-index',"--$cmd",'-z','--stdin') or croak $!;
+       }
+       local $/ = "\0";
+       while (my $x = <$p>) {
+               chomp $x;
+               if (!$no_text_base && lstat $x && ! -l _ &&
+                               svn_propget_base('svn:keywords', $x)) {
+                       my $mode = -x _ ? 0755 : 0644;
+                       my ($v,$d,$f) = File::Spec->splitpath($x);
+                       my $tb = File::Spec->catfile($d, '.svn', 'tmp',
+                                               'text-base',"$f.svn-base");
+                       $tb =~ s#^/##;
+                       unless (-f $tb) {
+                               $tb = File::Spec->catfile($d, '.svn',
+                                               'text-base',"$f.svn-base");
+                               $tb =~ s#^/##;
+                       }
+                       unlink $x or croak $!;
+                       eol_cp($tb, $x);
+                       chmod(($mode &~ umask), $x) or croak $!;
+               }
+               print $ui $x,"\0";
+       }
+       close $ui or croak $!;
+}
+
+sub index_changes {
+       my $no_text_base = shift;
+       do_update_index([qw/git-diff-files --name-only -z/],
+                       'remove',
+                       $no_text_base);
+       do_update_index([qw/git-ls-files -z --others/,
+                             "--exclude-from=$GIT_DIR/$GIT_SVN/info/exclude"],
+                       'add',
+                       $no_text_base);
 }
 
 sub s_to_file {
@@ -850,11 +961,23 @@ sub assert_revision_unknown {
        }
 }
 
+sub trees_eq {
+       my ($x, $y) = @_;
+       my @x = safe_qx('git-cat-file','commit',$x);
+       my @y = safe_qx('git-cat-file','commit',$y);
+       if (($y[0] ne $x[0]) || $x[0] !~ /^tree $sha1\n$/
+                               || $y[0] !~ /^tree $sha1\n$/) {
+               print STDERR "Trees not equal: $y[0] != $x[0]\n";
+               return 0
+       }
+       return 1;
+}
+
 sub assert_revision_eq_or_unknown {
        my ($revno, $commit) = @_;
        if (-f "$REV_DIR/$revno") {
                my $current = file_to_s("$REV_DIR/$revno");
-               if ($commit ne $current) {
+               if (($commit ne $current) && !trees_eq($commit, $current)) {
                        croak "$REV_DIR/$revno already exists!\n",
                                "current: $current\nexpected: $commit\n";
                }
@@ -887,7 +1010,7 @@ sub git_commit {
        defined $pid or croak $!;
        if ($pid == 0) {
                $ENV{GIT_INDEX_FILE} = $GIT_SVN_INDEX;
-               git_addremove();
+               index_changes();
                chomp(my $tree = `git-write-tree`);
                croak if $?;
                if (exists $tree_map{$tree}) {
@@ -1019,6 +1142,9 @@ sub svn_compat_check {
        if (grep /usage: checkout URL\[\@REV\]/,@co_help) {
                $_svn_co_url_revs = 1;
        }
+       if (grep /\[TARGET\[\@REV\]\.\.\.\]/, `svn propget -h`) {
+               $_svn_pg_peg_revs = 1;
+       }
 
        # I really, really hope nobody hits this...
        unless (grep /stop-on-copy/, (safe_qx(qw(svn log -h)))) {
@@ -1098,13 +1224,23 @@ sub load_authors {
        close $authors or croak $!;
 }
 
+sub svn_propget_base {
+       my ($p, $f) = @_;
+       $f .= '@BASE' if $_svn_pg_peg_revs;
+       return safe_qx(qw/svn propget/, $p, $f);
+}
+
 __END__
 
 Data structures:
 
-@svn_log = array of log_msg hashes
+$svn_log hashref (as returned by svn_log_raw)
+{
+       fh => file handle of the log file,
+       state => state of the log file parser (sep/msg/rev/msg_start...)
+}
 
-$log_msg hash
+$log_msg hashref as returned by next_log_entry($svn_log)
 {
        msg => 'whitespace-formatted log entry
 ',                                             # trailing newline is preserved