3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
11 git-archimport-script -i <archive>/<branch> [<archive>/<branch>]
12 [ <archive>/<branch> ]
14 The script expects you to provide the key roots where it can start the
15 import from an 'initial import' or 'tag' type of Arch commit. It will
16 then follow all the branching and tagging within the provided roots.
18 It will die if it sees branches that have different roots.
22 - keep track of merged patches, and mark a git merge when it happens
23 - smarter rules to parse the archive history "up" and "down"
24 - be able to continue an import where we left off
25 - audit shell-escaping of filenames
29 Add print in front of the shell commands invoked via backticks.
37 use File::Temp qw(tempfile);
38 use File::Path qw(mkpath);
39 use File::Basename qw(basename dirname);
40 use String::ShellQuote;
44 use POSIX qw(strftime dup2);
45 use Data::Dumper qw/ Dumper /;
48 $SIG{'PIPE'}="IGNORE";
51 our($opt_h,$opt_v, $opt_T,
52 $opt_C,$opt_t, $opt_i);
56 Usage: ${\basename $0} # fetch/update GIT from Arch
57 [ -h ] [ -v ] [ -i ] [ -T ]
58 [ -C GIT_repository ] [ -t tempdir ]
59 repository/arch-branch [ repository/arch-branch] ...
64 getopts("hviC:t:") or usage();
67 @ARGV >= 1 or usage();
68 my @arch_roots = @ARGV;
72 $tmp .= '/git-archimport/';
74 my $git_tree = $opt_C;
78 my @psets = (); # the collection
80 foreach my $root (@arch_roots) {
81 my ($arepo, $abranch) = split(m!/!, $root);
82 open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |"
83 or die "Problems with tla abrowse: $!";
85 my %ps = (); # the current one
92 # first record padded w 8 spaces
95 # store the record we just captured
97 my %temp = %ps; # break references
98 push (@psets, \%temp);
102 my ($id, $type) = split(m/\s{3}/, $_);
107 if ($type =~ m/^\(simple changeset\)/) {
109 } elsif ($type eq '(initial import)') {
111 } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
115 warn "Unknown type $type";
121 # 10 leading spaces or more
122 # indicate commit metadata
125 if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
127 my ($date, $authoremail) = split(m/\s{2,}/, $_);
129 $ps{date} =~ s/\bGMT$//; # strip off trailign GMT
130 if ($ps{date} =~ m/\b\w+$/) {
131 warn 'Arch dates not in GMT?! - imported dates will be wrong';
134 $authoremail =~ m/^(.+)\s(\S+)$/;
140 } elsif ($lastseen eq 'date') {
141 # the only hint is position
142 # subject is after date
146 } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
148 $lastseen = 'merges';
150 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
151 push (@{$ps{merges}}, $_);
153 warn 'more metadata after merges!?';
160 my %temp = %ps; # break references
161 push (@psets, \%temp);
165 } # end foreach $root
167 ## Order patches by time
168 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
170 #print Dumper \@psets;
173 ## TODO cleanup irrelevant patches
174 ## and put an initial import
177 if ($opt_i) { # initial import
178 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
179 print "Starting import from $psets[0]{id}\n";
181 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
188 my $lastbranch = branchname($psets[0]{id}); # only good for initial import
189 my $importseen = $opt_i ? 0 : 1; # start at 1 if opt_i
191 foreach my $ps (@psets) {
193 $ps->{branch} = branchname($ps->{id});
196 # ensure we have a clean state
198 if (`git diff-files`) {
199 die "Unclean tree when about to process $ps->{id} " .
200 " - did we fail to commit cleanly before?";
205 # create the branch if needed
207 if ($ps->{type} eq 'i' && $importseen) {
208 die "Should not have more than one 'Initial import' per GIT import";
211 unless ($opt_i && !$importseen) { # skip for first commit
212 if ( -e ".git/refs/heads/$ps->{branch}") {
213 # we know about this branch
214 `git checkout $ps->{branch}`;
216 # new branch! we need to verify a few things
217 die "Branch on a non-tag!" unless $ps->{type} eq 't';
218 my $branchpoint = ptag($ps->{tag});
219 die "Tagging from unknown id unsupported: $ps->{tag}"
222 # find where we are supposed to branch from
223 `git checkout -b $ps->{branch} $branchpoint`;
230 # Apply the import/changeset/merge into the working tree
232 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
234 apply_import($ps) or die $!;
235 } elsif ($ps->{type} eq 's') {
240 # prepare update git's index, based on what arch knows
241 # about the pset, resolve parents, etc
245 my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
246 die "Error in cat-archive-log: $!" if $?;
248 # parselog will git-add/rm files
249 # and generally prepare things for the commit
250 # NOTE: parselog will shell-quote filenames!
251 my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
252 my $logmessage = "$sum\n$msg";
255 # imports don't give us good info
256 # on added files. Shame on them
257 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
258 `find . -type f -print0 | grep -zv '^./.git' | xargs -0 -l100 git-update-cache --add`;
259 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-cache --remove`;
264 my @slice = splice(@$add, 0, 100);
265 my $slice = join(' ', @slice);
266 `git-update-cache --add $slice`;
267 die "Error in git-update-cache --add: $!" if $?;
271 foreach my $file (@$del) {
272 unlink $file or die "Problems deleting $file : $!";
275 my @slice = splice(@$del, 0, 100);
276 my $slice = join(' ', @slice);
277 `git-update-cache --remove $slice`;
278 die "Error in git-update-cache --remove: $!" if $?;
281 if (@$ren) { # renamed
283 die "Odd number of entries in rename!?";
287 my $from = pop @$ren;
290 unless (-d dirname($to)) {
291 mkpath(dirname($to)); # will die on err
293 #print "moving $from $to";
295 die "Error renaming $from $to : $!" if $?;
296 `git-update-cache --remove $from`;
297 die "Error in git-update-cache --remove: $!" if $?;
298 `git-update-cache --add $to`;
299 die "Error in git-update-cache --add: $!" if $?;
303 if (@$mod) { # must be _after_ renames
305 my @slice = splice(@$mod, 0, 100);
306 my $slice = join(' ', @slice);
307 `git-update-cache $slice`;
308 die "Error in git-update-cache: $!" if $?;
312 # warn "errors when running git-update-cache! $!";
313 $tree = `git-write-tree`;
314 die "cannot write tree $!" if $?;
322 if ( -e ".git/refs/heads/$ps->{branch}") {
323 if (open HEAD, "<.git/refs/heads/$ps->{branch}") {
329 if ($ps->{type} eq 's') {
330 warn "Could not find the right head for the branch $ps->{branch}";
335 my $par = join (' ', @par);
338 # Commit, tag and clean state
341 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
342 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
343 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
344 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
345 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
346 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
348 my ($pid, $commit_rh, $commit_wh);
349 $commit_rh = 'commit_rh';
350 $commit_wh = 'commit_wh';
352 $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par")
354 print WRITER $logmessage; # write
356 my $commitid = <READER>; # read
359 waitpid $pid,0; # close;
361 if (length $commitid != 40) {
362 die "Something went wrong with the commit! $! $commitid";
367 open HEAD, ">.git/refs/heads/$ps->{branch}";
368 print HEAD $commitid;
370 unlink ('.git/HEAD');
371 symlink("refs/heads/$ps->{branch}",".git/HEAD");
374 ptag($ps->{id}, $commitid); # private tag
375 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
376 tag($ps->{id}, $commitid);
378 print " * Committed $ps->{id}\n";
379 print " + tree $tree\n";
380 print " + commit $commitid\n";
381 # print " + commit date is $ps->{date} \n";
387 my @parts = split(m/--/, $id);
388 return join('--', @parts[0..1]);
393 my $bname = branchname($ps->{id});
397 `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
398 die "Cannot get import: $!" if $?;
399 `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
400 die "Cannot rsync import:$!" if $?;
402 `rm -fr $tmp/import`;
403 die "Cannot remove tempdir: $!" if $?;
415 `tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
416 die "Cannot get changeset: $!" if $?;
419 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
420 # this can be sped up considerably by doing
421 # (find | xargs cat) | patch
422 # but that cna get mucked up by patches
423 # with missing trailing newlines or the standard
424 # 'missing newline' flag in the patch - possibly
425 # produced with an old/buggy diff.
426 # slow and safe, we invoke patch once per patchfile
427 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
428 die "Problem applying patches! $!" if $?;
431 # apply changed binary files
432 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
433 foreach my $mod (@modified) {
436 $orig =~ s/\.modified$//; # lazy
437 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
438 #print "rsync -p '$mod' '$orig'";
439 `rsync -p $mod ./$orig`;
440 die "Problem applying binary changes! $!" if $?;
445 `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
447 # deleted files are hinted from the commitlog processing
449 `rm -fr $tmp/changeset`;
454 # A log entry looks like
455 # Revision: moodle-org--moodle--1.3.3--patch-15
456 # Archive: arch-eduforge@catalyst.net.nz--2004
457 # Creator: Penny Leach <penny@catalyst.net.nz>
458 # Date: Wed May 25 14:15:34 NZST 2005
459 # Standard-date: 2005-05-25 02:15:34 GMT
460 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
461 # lang/de/.arch-ids/block_html.php.id
462 # New-directories: lang/de/help/questionnaire
463 # lang/de/help/questionnaire/.arch-ids
464 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
465 # db_sears.sql db/db_sears.sql
466 # Removed-files: lang/be/docs/.arch-ids/release.html.id
467 # lang/be/docs/.arch-ids/releaseold.html.id
468 # Modified-files: admin/cron.php admin/delete.php
469 # admin/editor.html backup/lib.php backup/restore.php
470 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
471 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
474 # Updating yadda tadda tadda madda
479 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
481 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
483 @add = split(m/\s+/s, $files);
486 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
488 @del = split(m/\s+/s, $files);
491 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
493 @mod = split(m/\s+/s, $files);
496 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
498 @ren = split(m/\s+/s, $files);
502 if ($log =~ m/^Summary:(.+?)$/m ) {
509 if ($log =~ m/\n\n(.+)$/s) {
517 foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
519 while (my $t = pop @$ref) {
520 next unless length ($t);
521 next if $t =~ m!\{arch\}/!;
522 next if $t =~ m!\.arch-ids/!;
523 next if $t =~ m!\.arch-inventory$!;
524 push (@tmp, shell_quote($t));
529 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
530 return ($sum, $msg, \@add, \@del, \@mod, \@ren);
535 my ($tag, $commit) = @_;
537 $tag = shell_quote($tag);
540 open(C,">.git/refs/tags/$tag")
541 or die "Cannot create tag $tag: $!\n";
543 or die "Cannot write tag $tag: $!\n";
545 or die "Cannot write tag $tag: $!\n";
546 print "Created tag '$tag' on '$commit'\n" if $opt_v;
548 open(C,"<.git/refs/tags/$tag")
549 or die "Cannot read tag $tag: $!\n";
552 die "Error reading tag $tag: $!\n" unless length $commit == 40;
554 or die "Cannot read tag $tag: $!\n";
559 # write/read a private tag
560 # reads fail softly if the tag isn't there
562 my ($tag, $commit) = @_;
564 $tag = shell_quote($tag);
566 unless (-d '.git/archimport/tags') {
567 mkpath('.git/archimport/tags');
570 if ($commit) { # write
571 open(C,">.git/archimport/tags/$tag")
572 or die "Cannot create tag $tag: $!\n";
574 or die "Cannot write tag $tag: $!\n";
576 or die "Cannot write tag $tag: $!\n";
578 # if the tag isn't there, return 0
579 unless ( -s ".git/archimport/tags/$tag") {
580 warn "Could not find tag $tag -- perhaps it isn't in the repos we have?\n"
584 open(C,"<.git/archimport/tags/$tag")
585 or die "Cannot read tag $tag: $!\n";
588 die "Error reading tag $tag: $!\n" unless length $commit == 40;
590 or die "Cannot read tag $tag: $!\n";