blame: make diff algorithm configurable

The diff algorithm used in 'git-blame(1)' is set to 'myers',
without the possibility to change it aside from the `--minimal` option.

There has been long-standing interest in changing the default diff
algorithm to "histogram", and Git 3.0 was floated as a possible occasion
for taking some steps towards that:

https://lore.kernel.org/git/xmqqed873vgn.fsf@gitster.g/

As a preparation for this move, it is worth making sure that the diff
algorithm is configurable where useful.

Make it configurable in the `git-blame(1)` command by introducing the
`--diff-algorithm` option and make honor the `diff.algorithm` config
variable. Keep Myers diff as the default.

Signed-off-by: Antonin Delpeuch <antonin@delpeuch.eu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/Documentation/diff-algorithm-option.adoc b/Documentation/diff-algorithm-option.adoc
new file mode 100644
index 0000000..8e3a0b6
--- /dev/null
+++ b/Documentation/diff-algorithm-option.adoc
@@ -0,0 +1,20 @@
+`--diff-algorithm=(patience|minimal|histogram|myers)`::
+	Choose a diff algorithm. The variants are as follows:
++
+--
+   `default`;;
+   `myers`;;
+	The basic greedy diff algorithm. Currently, this is the default.
+   `minimal`;;
+	Spend extra time to make sure the smallest possible diff is
+	produced.
+   `patience`;;
+	Use "patience diff" algorithm when generating patches.
+   `histogram`;;
+	This algorithm extends the patience algorithm to "support
+	low-occurrence common elements".
+--
++
+For instance, if you configured the `diff.algorithm` variable to a
+non-default value and want to use the default one, then you
+have to use `--diff-algorithm=default` option.
diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc
index ae31520..9cdad6f 100644
--- a/Documentation/diff-options.adoc
+++ b/Documentation/diff-options.adoc
@@ -197,26 +197,7 @@
 appearing as a deletion or addition in the output. It uses the "patience
 diff" algorithm internally.
 
-`--diff-algorithm=(patience|minimal|histogram|myers)`::
-	Choose a diff algorithm. The variants are as follows:
-+
---
-   `default`;;
-   `myers`;;
-	The basic greedy diff algorithm. Currently, this is the default.
-   `minimal`;;
-	Spend extra time to make sure the smallest possible diff is
-	produced.
-   `patience`;;
-	Use "patience diff" algorithm when generating patches.
-   `histogram`;;
-	This algorithm extends the patience algorithm to "support
-	low-occurrence common elements".
---
-+
-For instance, if you configured the `diff.algorithm` variable to a
-non-default value and want to use the default one, then you
-have to use `--diff-algorithm=default` option.
+include::diff-algorithm-option.adoc[]
 
 `--stat[=<width>[,<name-width>[,<count>]]]`::
 	Generate a diffstat. By default, as much space as necessary
diff --git a/Documentation/git-blame.adoc b/Documentation/git-blame.adoc
index e438d28..adcbb6f 100644
--- a/Documentation/git-blame.adoc
+++ b/Documentation/git-blame.adoc
@@ -85,6 +85,8 @@
 	Ignore whitespace when comparing the parent's version and
 	the child's to find where the lines came from.
 
+include::diff-algorithm-option.adoc[]
+
 --abbrev=<n>::
 	Instead of using the default 7+1 hexadecimal digits as the
 	abbreviated object name, use <m>+1 digits, where <m> is at
diff --git a/builtin/blame.c b/builtin/blame.c
index 2703820..27b513d 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value,
 		}
 	}
 
+	if (!strcmp(var, "diff.algorithm")) {
+		long diff_algorithm;
+		if (!value)
+			return config_error_nonbool(var);
+		diff_algorithm = parse_algorithm_value(value);
+		if (diff_algorithm < 0)
+			return error(_("unknown value for config '%s': %s"),
+				     var, value);
+		xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
+		xdl_opts |= diff_algorithm;
+		return 0;
+	}
+
 	if (git_diff_heuristic_config(var, value, cb) < 0)
 		return -1;
 	if (userdiff_config(var, value) < 0)
@@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int
 	return 0;
 }
 
+static int blame_diff_algorithm_minimal(const struct option *option,
+					const char *arg, int unset)
+{
+	int *opt = option->value;
+
+	BUG_ON_OPT_ARG(arg);
+
+	*opt &= ~XDF_DIFF_ALGORITHM_MASK;
+	if (!unset)
+		*opt |= XDF_NEED_MINIMAL;
+
+	return 0;
+}
+
+static int blame_diff_algorithm_callback(const struct option *option,
+					 const char *arg, int unset)
+{
+	int *opt = option->value;
+	long value = parse_algorithm_value(arg);
+
+	BUG_ON_OPT_NEG(unset);
+
+	if (value < 0)
+		return error(_("option diff-algorithm accepts \"myers\", "
+			       "\"minimal\", \"patience\" and \"histogram\""));
+
+	*opt &= ~XDF_DIFF_ALGORITHM_MASK;
+	*opt |= value;
+
+	return 0;
+}
+
 static int is_a_rev(const char *name)
 {
 	struct object_id oid;
@@ -915,11 +960,16 @@ int cmd_blame(int argc,
 		OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
 		OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
 		OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+		OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_("<algorithm>"),
+			       N_("choose a diff algorithm"),
+			       PARSE_OPT_NONEG, blame_diff_algorithm_callback),
 		OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")),
 		OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")),
 		OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
 		OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
-		OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL),
+		OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL,
+			       N_("spend extra cycles to find a better match"),
+			       PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, blame_diff_algorithm_minimal),
 		OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")),
 		OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")),
 		OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback),
diff --git a/t/meson.build b/t/meson.build
index 401b24e..9f2fe7a 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -955,6 +955,7 @@
   't8012-blame-colors.sh',
   't8013-blame-ignore-revs.sh',
   't8014-blame-ignore-fuzzy.sh',
+  't8015-blame-diff-algorithm.sh',
   't8020-last-modified.sh',
   't9001-send-email.sh',
   't9002-column.sh',
diff --git a/t/t8015-blame-diff-algorithm.sh b/t/t8015-blame-diff-algorithm.sh
new file mode 100755
index 0000000..cd70953
--- /dev/null
+++ b/t/t8015-blame-diff-algorithm.sh
@@ -0,0 +1,203 @@
+#!/bin/sh
+
+test_description='git blame with specific diff algorithm'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+	cat >file.c <<-\EOF &&
+	int f(int x, int y)
+	{
+	  if (x == 0)
+	  {
+	    return y;
+	  }
+	  return x;
+	}
+
+	int g(size_t u)
+	{
+	  while (u < 30)
+	  {
+	    u++;
+	  }
+	  return u;
+	}
+	EOF
+	test_write_lines x x x x >file.txt &&
+	git add file.c file.txt &&
+	GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 &&
+
+	cat >file.c <<-\EOF &&
+	int g(size_t u)
+	{
+	  while (u < 30)
+	  {
+	    u++;
+	  }
+	  return u;
+	}
+
+	int h(int x, int y, int z)
+	{
+	  if (z == 0)
+	  {
+	    return x;
+	  }
+	  return y;
+	}
+	EOF
+	test_write_lines x x x A B C D x E F G >file.txt &&
+	git add file.c file.txt &&
+	GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2
+'
+
+test_expect_success 'blame uses Myers diff algorithm by default' '
+	cat >expected <<-\EOF &&
+	Commit_2 int g(size_t u)
+	Commit_1 {
+	Commit_2   while (u < 30)
+	Commit_1   {
+	Commit_2     u++;
+	Commit_1   }
+	Commit_2   return u;
+	Commit_1 }
+	Commit_1
+	Commit_2 int h(int x, int y, int z)
+	Commit_1 {
+	Commit_2   if (z == 0)
+	Commit_1   {
+	Commit_2     return x;
+	Commit_1   }
+	Commit_2   return y;
+	Commit_1 }
+	EOF
+
+	git blame file.c >output &&
+	sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts &&
+	sed -e "s/ *$//g" without_varying_parts >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'blame honors --diff-algorithm option' '
+	cat >expected <<-\EOF &&
+	Commit_1 int g(size_t u)
+	Commit_1 {
+	Commit_1   while (u < 30)
+	Commit_1   {
+	Commit_1     u++;
+	Commit_1   }
+	Commit_1   return u;
+	Commit_1 }
+	Commit_2
+	Commit_2 int h(int x, int y, int z)
+	Commit_2 {
+	Commit_2   if (z == 0)
+	Commit_2   {
+	Commit_2     return x;
+	Commit_2   }
+	Commit_2   return y;
+	Commit_2 }
+	EOF
+
+	git blame file.c --diff-algorithm histogram >output &&
+	sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts &&
+	sed -e "s/ *$//g" without_varying_parts >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'blame honors diff.algorithm config variable' '
+	cat >expected <<-\EOF &&
+	Commit_1 int g(size_t u)
+	Commit_1 {
+	Commit_1   while (u < 30)
+	Commit_1   {
+	Commit_1     u++;
+	Commit_1   }
+	Commit_1   return u;
+	Commit_1 }
+	Commit_2
+	Commit_2 int h(int x, int y, int z)
+	Commit_2 {
+	Commit_2   if (z == 0)
+	Commit_2   {
+	Commit_2     return x;
+	Commit_2   }
+	Commit_2   return y;
+	Commit_2 }
+	EOF
+
+	git -c diff.algorithm=histogram blame file.c >output &&
+	sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
+	    -e "s/ *$//g" output >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' '
+	cat >expected <<-\EOF &&
+	Commit_1 int g(size_t u)
+	Commit_1 {
+	Commit_1   while (u < 30)
+	Commit_1   {
+	Commit_1     u++;
+	Commit_1   }
+	Commit_1   return u;
+	Commit_1 }
+	Commit_2
+	Commit_2 int h(int x, int y, int z)
+	Commit_2 {
+	Commit_2   if (z == 0)
+	Commit_2   {
+	Commit_2     return x;
+	Commit_2   }
+	Commit_2   return y;
+	Commit_2 }
+	EOF
+
+	git -c diff.algorithm=myers blame file.c --diff-algorithm histogram >output &&
+	sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
+	    -e "s/ *$//g" output >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'blame honors --minimal option' '
+	cat >expected <<-\EOF &&
+	Commit_1 x
+	Commit_1 x
+	Commit_1 x
+	Commit_2 A
+	Commit_2 B
+	Commit_2 C
+	Commit_2 D
+	Commit_1 x
+	Commit_2 E
+	Commit_2 F
+	Commit_2 G
+	EOF
+
+	git blame file.txt --minimal >output &&
+	sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual &&
+	test_cmp expected actual
+'
+
+test_expect_success 'blame respects the order of diff options' '
+	cat >expected <<-\EOF &&
+	Commit_1 x
+	Commit_1 x
+	Commit_1 x
+	Commit_2 A
+	Commit_2 B
+	Commit_2 C
+	Commit_2 D
+	Commit_2 x
+	Commit_2 E
+	Commit_2 F
+	Commit_2 G
+	EOF
+
+	git blame file.txt --minimal --diff-algorithm myers >output &&
+	sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual &&
+	test_cmp expected actual
+'
+
+test_done