Resurrect git-rerere to contrib/examples

It is handy to have a copy readily available for checking regressions.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/contrib/examples/git-rerere.perl b/contrib/examples/git-rerere.perl
new file mode 100755
index 0000000..4f69209
--- /dev/null
+++ b/contrib/examples/git-rerere.perl
@@ -0,0 +1,284 @@
+#!/usr/bin/perl
+#
+# REuse REcorded REsolve.  This tool records a conflicted automerge
+# result and its hand resolution, and helps to resolve future
+# automerge that results in the same conflict.
+#
+# To enable this feature, create a directory 'rr-cache' under your
+# .git/ directory.
+
+use Digest;
+use File::Path;
+use File::Copy;
+
+my $git_dir = $::ENV{GIT_DIR} || ".git";
+my $rr_dir = "$git_dir/rr-cache";
+my $merge_rr = "$git_dir/rr-cache/MERGE_RR";
+
+my %merge_rr = ();
+
+sub read_rr {
+	if (!-f $merge_rr) {
+		%merge_rr = ();
+		return;
+	}
+	my $in;
+	local $/ = "\0";
+	open $in, "<$merge_rr" or die "$!: $merge_rr";
+	while (<$in>) {
+		chomp;
+		my ($name, $path) = /^([0-9a-f]{40})\t(.*)$/s;
+		$merge_rr{$path} = $name;
+	}
+	close $in;
+}
+
+sub write_rr {
+	my $out;
+	open $out, ">$merge_rr" or die "$!: $merge_rr";
+	for my $path (sort keys %merge_rr) {
+		my $name = $merge_rr{$path};
+		print $out "$name\t$path\0";
+	}
+	close $out;
+}
+
+sub compute_conflict_name {
+	my ($path) = @_;
+	my @side = ();
+	my $in;
+	open $in, "<$path"  or die "$!: $path";
+
+	my $sha1 = Digest->new("SHA-1");
+	my $hunk = 0;
+	while (<$in>) {
+		if (/^<<<<<<< .*/) {
+			$hunk++;
+			@side = ([], undef);
+		}
+		elsif (/^=======$/) {
+			$side[1] = [];
+		}
+		elsif (/^>>>>>>> .*/) {
+			my ($one, $two);
+			$one = join('', @{$side[0]});
+			$two = join('', @{$side[1]});
+			if ($two le $one) {
+				($one, $two) = ($two, $one);
+			}
+			$sha1->add($one);
+			$sha1->add("\0");
+			$sha1->add($two);
+			$sha1->add("\0");
+			@side = ();
+		}
+		elsif (@side == 0) {
+			next;
+		}
+		elsif (defined $side[1]) {
+			push @{$side[1]}, $_;
+		}
+		else {
+			push @{$side[0]}, $_;
+		}
+	}
+	close $in;
+	return ($sha1->hexdigest, $hunk);
+}
+
+sub record_preimage {
+	my ($path, $name) = @_;
+	my @side = ();
+	my ($in, $out);
+	open $in, "<$path"  or die "$!: $path";
+	open $out, ">$name" or die "$!: $name";
+
+	while (<$in>) {
+		if (/^<<<<<<< .*/) {
+			@side = ([], undef);
+		}
+		elsif (/^=======$/) {
+			$side[1] = [];
+		}
+		elsif (/^>>>>>>> .*/) {
+			my ($one, $two);
+			$one = join('', @{$side[0]});
+			$two = join('', @{$side[1]});
+			if ($two le $one) {
+				($one, $two) = ($two, $one);
+			}
+			print $out "<<<<<<<\n";
+			print $out $one;
+			print $out "=======\n";
+			print $out $two;
+			print $out ">>>>>>>\n";
+			@side = ();
+		}
+		elsif (@side == 0) {
+			print $out $_;
+		}
+		elsif (defined $side[1]) {
+			push @{$side[1]}, $_;
+		}
+		else {
+			push @{$side[0]}, $_;
+		}
+	}
+	close $out;
+	close $in;
+}
+
+sub find_conflict {
+	my $in;
+	local $/ = "\0";
+	my $pid = open($in, '-|');
+	die "$!" unless defined $pid;
+	if (!$pid) {
+		exec(qw(git ls-files -z -u)) or die "$!: ls-files";
+	}
+	my %path = ();
+	my @path = ();
+	while (<$in>) {
+		chomp;
+		my ($mode, $sha1, $stage, $path) =
+		    /^([0-7]+) ([0-9a-f]{40}) ([123])\t(.*)$/s;
+		$path{$path} |= (1 << $stage);
+	}
+	close $in;
+	while (my ($path, $status) = each %path) {
+		if ($status == 14) { push @path, $path; }
+	}
+	return @path;
+}
+
+sub merge {
+	my ($name, $path) = @_;
+	record_preimage($path, "$rr_dir/$name/thisimage");
+	unless (system('git', 'merge-file', map { "$rr_dir/$name/${_}image" }
+		       qw(this pre post))) {
+		my $in;
+		open $in, "<$rr_dir/$name/thisimage" or
+		    die "$!: $name/thisimage";
+		my $out;
+		open $out, ">$path" or die "$!: $path";
+		while (<$in>) { print $out $_; }
+		close $in;
+		close $out;
+		return 1;
+	}
+	return 0;
+}
+
+sub garbage_collect_rerere {
+	# We should allow specifying these from the command line and
+	# that is why the caller gives @ARGV to us, but I am lazy.
+
+	my $cutoff_noresolve = 15; # two weeks
+	my $cutoff_resolve = 60; # two months
+	my @to_remove;
+	while (<$rr_dir/*/preimage>) {
+		my ($dir) = /^(.*)\/preimage$/;
+		my $cutoff = ((-f "$dir/postimage")
+			      ? $cutoff_resolve
+			      : $cutoff_noresolve);
+		my $age = -M "$_";
+		if ($cutoff <= $age) {
+			push @to_remove, $dir;
+		}
+	}
+	if (@to_remove) {
+		rmtree(\@to_remove);
+	}
+}
+
+-d "$rr_dir" || exit(0);
+
+read_rr();
+
+if (@ARGV) {
+	my $arg = shift @ARGV;
+	if ($arg eq 'clear') {
+		for my $path (keys %merge_rr) {
+			my $name = $merge_rr{$path};
+			if (-d "$rr_dir/$name" &&
+			    ! -f "$rr_dir/$name/postimage") {
+				rmtree(["$rr_dir/$name"]);
+			}
+		}
+		unlink $merge_rr;
+	}
+	elsif ($arg eq 'status') {
+		for my $path (keys %merge_rr) {
+			print $path, "\n";
+		}
+	}
+	elsif ($arg eq 'diff') {
+		for my $path (keys %merge_rr) {
+			my $name = $merge_rr{$path};
+			system('diff', ((@ARGV == 0) ? ('-u') : @ARGV),
+				'-L', "a/$path", '-L', "b/$path",
+				"$rr_dir/$name/preimage", $path);
+		}
+	}
+	elsif ($arg eq 'gc') {
+		garbage_collect_rerere(@ARGV);
+	}
+	else {
+		die "$0 unknown command: $arg\n";
+	}
+	exit 0;
+}
+
+my %conflict = map { $_ => 1 } find_conflict();
+
+# MERGE_RR records paths with conflicts immediately after merge
+# failed.  Some of the conflicted paths might have been hand resolved
+# in the working tree since then, but the initial run would catch all
+# and register their preimages.
+
+for my $path (keys %conflict) {
+	# This path has conflict.  If it is not recorded yet,
+	# record the pre-image.
+	if (!exists $merge_rr{$path}) {
+		my ($name, $hunk) = compute_conflict_name($path);
+		next unless ($hunk);
+		$merge_rr{$path} = $name;
+		if (! -d "$rr_dir/$name") {
+			mkpath("$rr_dir/$name", 0, 0777);
+			print STDERR "Recorded preimage for '$path'\n";
+			record_preimage($path, "$rr_dir/$name/preimage");
+		}
+	}
+}
+
+# Now some of the paths that had conflicts earlier might have been
+# hand resolved.  Others may be similar to a conflict already that
+# was resolved before.
+
+for my $path (keys %merge_rr) {
+	my $name = $merge_rr{$path};
+
+	# We could resolve this automatically if we have images.
+	if (-f "$rr_dir/$name/preimage" &&
+	    -f "$rr_dir/$name/postimage") {
+		if (merge($name, $path)) {
+			print STDERR "Resolved '$path' using previous resolution.\n";
+			# Then we do not have to worry about this path
+			# anymore.
+			delete $merge_rr{$path};
+			next;
+		}
+	}
+
+	# Let's see if we have resolved it.
+	(undef, my $hunk) = compute_conflict_name($path);
+	next if ($hunk);
+
+	print STDERR "Recorded resolution for '$path'.\n";
+	copy($path, "$rr_dir/$name/postimage");
+	# And we do not have to worry about this path anymore.
+	delete $merge_rr{$path};
+}
+
+# Write out the rest.
+write_rr();