blame: make diff algorithm configurable

The diff algorithm used in 'git-blame(1)' is set to 'myers',
without the possibility to change it aside from the `--minimal` option.

There has been long-standing interest in changing the default diff
algorithm to "histogram", and Git 3.0 was floated as a possible occasion
for taking some steps towards that:

https://lore.kernel.org/git/xmqqed873vgn.fsf@gitster.g/

As a preparation for this move, it is worth making sure that the diff
algorithm is configurable where useful.

Make it configurable in the `git-blame(1)` command by introducing the
`--diff-algorithm` option and make honor the `diff.algorithm` config
variable. Keep Myers diff as the default.

Signed-off-by: Antonin Delpeuch <antonin@delpeuch.eu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Antonin Delpeuch 2025-11-17 08:04:32 +00:00 committed by Junio C Hamano
parent 881793c4f7
commit ffffb987fc
6 changed files with 278 additions and 21 deletions

View File

@ -0,0 +1,20 @@
`--diff-algorithm=(patience|minimal|histogram|myers)`::
Choose a diff algorithm. The variants are as follows:
+
--
`default`;;
`myers`;;
The basic greedy diff algorithm. Currently, this is the default.
`minimal`;;
Spend extra time to make sure the smallest possible diff is
produced.
`patience`;;
Use "patience diff" algorithm when generating patches.
`histogram`;;
This algorithm extends the patience algorithm to "support
low-occurrence common elements".
--
+
For instance, if you configured the `diff.algorithm` variable to a
non-default value and want to use the default one, then you
have to use `--diff-algorithm=default` option.

View File

@ -197,26 +197,7 @@ and starts with _<text>_, this algorithm attempts to prevent it from
appearing as a deletion or addition in the output. It uses the "patience appearing as a deletion or addition in the output. It uses the "patience
diff" algorithm internally. diff" algorithm internally.
`--diff-algorithm=(patience|minimal|histogram|myers)`:: include::diff-algorithm-option.adoc[]
Choose a diff algorithm. The variants are as follows:
+
--
`default`;;
`myers`;;
The basic greedy diff algorithm. Currently, this is the default.
`minimal`;;
Spend extra time to make sure the smallest possible diff is
produced.
`patience`;;
Use "patience diff" algorithm when generating patches.
`histogram`;;
This algorithm extends the patience algorithm to "support
low-occurrence common elements".
--
+
For instance, if you configured the `diff.algorithm` variable to a
non-default value and want to use the default one, then you
have to use `--diff-algorithm=default` option.
`--stat[=<width>[,<name-width>[,<count>]]]`:: `--stat[=<width>[,<name-width>[,<count>]]]`::
Generate a diffstat. By default, as much space as necessary Generate a diffstat. By default, as much space as necessary

View File

@ -85,6 +85,8 @@ include::blame-options.adoc[]
Ignore whitespace when comparing the parent's version and Ignore whitespace when comparing the parent's version and
the child's to find where the lines came from. the child's to find where the lines came from.
include::diff-algorithm-option.adoc[]
--abbrev=<n>:: --abbrev=<n>::
Instead of using the default 7+1 hexadecimal digits as the Instead of using the default 7+1 hexadecimal digits as the
abbreviated object name, use <m>+1 digits, where <m> is at abbreviated object name, use <m>+1 digits, where <m> is at

View File

@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value,
} }
} }
if (!strcmp(var, "diff.algorithm")) {
long diff_algorithm;
if (!value)
return config_error_nonbool(var);
diff_algorithm = parse_algorithm_value(value);
if (diff_algorithm < 0)
return error(_("unknown value for config '%s': %s"),
var, value);
xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
xdl_opts |= diff_algorithm;
return 0;
}
if (git_diff_heuristic_config(var, value, cb) < 0) if (git_diff_heuristic_config(var, value, cb) < 0)
return -1; return -1;
if (userdiff_config(var, value) < 0) if (userdiff_config(var, value) < 0)
@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int
return 0; return 0;
} }
static int blame_diff_algorithm_minimal(const struct option *option,
const char *arg, int unset)
{
int *opt = option->value;
BUG_ON_OPT_ARG(arg);
*opt &= ~XDF_DIFF_ALGORITHM_MASK;
if (!unset)
*opt |= XDF_NEED_MINIMAL;
return 0;
}
static int blame_diff_algorithm_callback(const struct option *option,
const char *arg, int unset)
{
int *opt = option->value;
long value = parse_algorithm_value(arg);
BUG_ON_OPT_NEG(unset);
if (value < 0)
return error(_("option diff-algorithm accepts \"myers\", "
"\"minimal\", \"patience\" and \"histogram\""));
*opt &= ~XDF_DIFF_ALGORITHM_MASK;
*opt |= value;
return 0;
}
static int is_a_rev(const char *name) static int is_a_rev(const char *name)
{ {
struct object_id oid; struct object_id oid;
@ -915,11 +960,16 @@ int cmd_blame(int argc,
OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR), OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL), OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE), OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_("<algorithm>"),
N_("choose a diff algorithm"),
PARSE_OPT_NONEG, blame_diff_algorithm_callback),
OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")), OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")),
OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")), OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")),
OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE), OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR), OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL), OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL,
N_("spend extra cycles to find a better match"),
PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, blame_diff_algorithm_minimal),
OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")), OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")),
OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")), OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")),
OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback), OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback),

View File

@ -955,6 +955,7 @@ integration_tests = [
't8012-blame-colors.sh', 't8012-blame-colors.sh',
't8013-blame-ignore-revs.sh', 't8013-blame-ignore-revs.sh',
't8014-blame-ignore-fuzzy.sh', 't8014-blame-ignore-fuzzy.sh',
't8015-blame-diff-algorithm.sh',
't8020-last-modified.sh', 't8020-last-modified.sh',
't9001-send-email.sh', 't9001-send-email.sh',
't9002-column.sh', 't9002-column.sh',

203
t/t8015-blame-diff-algorithm.sh Executable file
View File

@ -0,0 +1,203 @@
#!/bin/sh
test_description='git blame with specific diff algorithm'
. ./test-lib.sh
test_expect_success setup '
cat >file.c <<-\EOF &&
int f(int x, int y)
{
if (x == 0)
{
return y;
}
return x;
}
int g(size_t u)
{
while (u < 30)
{
u++;
}
return u;
}
EOF
test_write_lines x x x x >file.txt &&
git add file.c file.txt &&
GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 &&
cat >file.c <<-\EOF &&
int g(size_t u)
{
while (u < 30)
{
u++;
}
return u;
}
int h(int x, int y, int z)
{
if (z == 0)
{
return x;
}
return y;
}
EOF
test_write_lines x x x A B C D x E F G >file.txt &&
git add file.c file.txt &&
GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2
'
test_expect_success 'blame uses Myers diff algorithm by default' '
cat >expected <<-\EOF &&
Commit_2 int g(size_t u)
Commit_1 {
Commit_2 while (u < 30)
Commit_1 {
Commit_2 u++;
Commit_1 }
Commit_2 return u;
Commit_1 }
Commit_1
Commit_2 int h(int x, int y, int z)
Commit_1 {
Commit_2 if (z == 0)
Commit_1 {
Commit_2 return x;
Commit_1 }
Commit_2 return y;
Commit_1 }
EOF
git blame file.c >output &&
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts &&
sed -e "s/ *$//g" without_varying_parts >actual &&
test_cmp expected actual
'
test_expect_success 'blame honors --diff-algorithm option' '
cat >expected <<-\EOF &&
Commit_1 int g(size_t u)
Commit_1 {
Commit_1 while (u < 30)
Commit_1 {
Commit_1 u++;
Commit_1 }
Commit_1 return u;
Commit_1 }
Commit_2
Commit_2 int h(int x, int y, int z)
Commit_2 {
Commit_2 if (z == 0)
Commit_2 {
Commit_2 return x;
Commit_2 }
Commit_2 return y;
Commit_2 }
EOF
git blame file.c --diff-algorithm histogram >output &&
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >without_varying_parts &&
sed -e "s/ *$//g" without_varying_parts >actual &&
test_cmp expected actual
'
test_expect_success 'blame honors diff.algorithm config variable' '
cat >expected <<-\EOF &&
Commit_1 int g(size_t u)
Commit_1 {
Commit_1 while (u < 30)
Commit_1 {
Commit_1 u++;
Commit_1 }
Commit_1 return u;
Commit_1 }
Commit_2
Commit_2 int h(int x, int y, int z)
Commit_2 {
Commit_2 if (z == 0)
Commit_2 {
Commit_2 return x;
Commit_2 }
Commit_2 return y;
Commit_2 }
EOF
git -c diff.algorithm=histogram blame file.c >output &&
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
-e "s/ *$//g" output >actual &&
test_cmp expected actual
'
test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' '
cat >expected <<-\EOF &&
Commit_1 int g(size_t u)
Commit_1 {
Commit_1 while (u < 30)
Commit_1 {
Commit_1 u++;
Commit_1 }
Commit_1 return u;
Commit_1 }
Commit_2
Commit_2 int h(int x, int y, int z)
Commit_2 {
Commit_2 if (z == 0)
Commit_2 {
Commit_2 return x;
Commit_2 }
Commit_2 return y;
Commit_2 }
EOF
git -c diff.algorithm=myers blame file.c --diff-algorithm histogram >output &&
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
-e "s/ *$//g" output >actual &&
test_cmp expected actual
'
test_expect_success 'blame honors --minimal option' '
cat >expected <<-\EOF &&
Commit_1 x
Commit_1 x
Commit_1 x
Commit_2 A
Commit_2 B
Commit_2 C
Commit_2 D
Commit_1 x
Commit_2 E
Commit_2 F
Commit_2 G
EOF
git blame file.txt --minimal >output &&
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual &&
test_cmp expected actual
'
test_expect_success 'blame respects the order of diff options' '
cat >expected <<-\EOF &&
Commit_1 x
Commit_1 x
Commit_1 x
Commit_2 A
Commit_2 B
Commit_2 C
Commit_2 D
Commit_2 x
Commit_2 E
Commit_2 F
Commit_2 G
EOF
git blame file.txt --minimal --diff-algorithm myers >output &&
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output >actual &&
test_cmp expected actual
'
test_done