blame: consult diff process for no-hunk detection

When a diff process is configured via diff.<driver>.process,
consult it during blame's per-commit diffing.  If the process
returns no hunks for a commit's changes to a file, treat the
commit as having no changes, causing blame to attribute lines
to earlier commits.

The consultation happens at the pass_blame_to_parent() callsite
using diff_process_fill_hunks(), matching how builtin_diff() in
diff.c uses the same function.  A new diff_hunks_xpp() variant
accepts a pre-populated xpparam_t so callers can pass external
hunks, while the existing diff_hunks() retains its original
signature and behavior.  The copy-detection callsite is
unaffected since it does not use the diff process.

The subprocess is long-running (one startup cost amortized
across the blame traversal), but each commit in the file's
history incurs a round-trip to the tool.

Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Michael Montalbo
2026-06-14 18:59:23 +00:00
committed by Junio C Hamano
parent ac69d22b32
commit 5533580575
2 changed files with 136 additions and 9 deletions

40
blame.c
View File

@@ -19,6 +19,8 @@
#include "tag.h"
#include "trace2.h"
#include "blame.h"
#include "diff-process.h"
#include "xdiff-interface.h"
#include "alloc.h"
#include "commit-slab.h"
#include "bloom.h"
@@ -314,17 +316,25 @@ static struct commit *fake_working_tree_commit(struct repository *r,
static int diff_hunks_xpp(mmfile_t *file_a, mmfile_t *file_b,
xdl_emit_hunk_consume_func_t hunk_func,
void *cb_data, xpparam_t *xpp)
{
xdemitconf_t xecfg = {0};
xdemitcb_t ecb = {NULL};
xecfg.hunk_func = hunk_func;
ecb.priv = cb_data;
return xdi_diff(file_a, file_b, xpp, &xecfg, &ecb);
}
static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
{
xpparam_t xpp = {0};
xdemitconf_t xecfg = {0};
xdemitcb_t ecb = {NULL};
xpp.flags = xdl_opts;
xecfg.hunk_func = hunk_func;
ecb.priv = cb_data;
return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
return diff_hunks_xpp(file_a, file_b, hunk_func, cb_data, &xpp);
}
static const char *get_next_line(const char *start, const char *end)
@@ -1943,6 +1953,7 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
struct blame_origin *parent, int ignore_diffs)
{
mmfile_t file_p, file_o;
xpparam_t xpp = {0};
struct blame_chunk_cb_data d;
struct blame_entry *newdest = NULL;
@@ -1961,10 +1972,21 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
&sb->num_read_blob, ignore_diffs);
sb->num_get_patch++;
if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
die("unable to generate diff (%s -> %s)",
oid_to_hex(&parent->commit->object.oid),
oid_to_hex(&target->commit->object.oid));
xpp.flags = sb->xdl_opts;
/*
* If the diff process considers the files equivalent,
* skip the diff so blame looks past this commit.
*/
if (diff_process_fill_hunks(&sb->revs->diffopt, target->path,
&file_p, &file_o, &xpp)
!= DIFF_PROCESS_EQUIVALENT) {
if (diff_hunks_xpp(&file_p, &file_o, blame_chunk_cb,
&d, &xpp))
die("unable to generate diff (%s -> %s)",
oid_to_hex(&parent->commit->object.oid),
oid_to_hex(&target->commit->object.oid));
}
free(xpp.external_hunks);
/* The rest are the same as the parent */
blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0,
parent, target, 0);

View File

@@ -445,4 +445,109 @@ test_expect_success 'diff process skipped when tool omits capability' '
test_must_be_empty stderr
'
#
# Blame integration.
#
test_expect_success 'blame uses tool-provided hunks' '
cat >blame-hunk.c <<-\EOF &&
line1
line2
line3
line4
original5
original6
line7
line8
line9
line10
EOF
git add blame-hunk.c &&
git commit -m "add blame-hunk.c" &&
ORIG=$(git rev-parse --short HEAD) &&
cat >blame-hunk.c <<-\EOF &&
line1
line2
line3
line4
changed5
changed6
line7
line8
changed9
changed10
EOF
git add blame-hunk.c &&
git commit -m "change blame-hunk.c" &&
CHANGE=$(git rev-parse --short HEAD) &&
# With fixed-hunk mode the tool reports only lines 5-6 as changed,
# so blame should attribute lines 9-10 to the original commit
# even though the builtin diff would show them as changed.
git -c diff.cdiff.process="$BACKEND --mode=fixed-hunk" \
blame blame-hunk.c >actual &&
sed -n "9p" actual >line9 &&
sed -n "10p" actual >line10 &&
test_grep "$ORIG" line9 &&
test_grep "$ORIG" line10 &&
sed -n "5p" actual >line5 &&
sed -n "6p" actual >line6 &&
test_grep "$CHANGE" line5 &&
test_grep "$CHANGE" line6
'
test_expect_success 'blame skips commits with no hunks from diff process' '
cat >blame.c <<-\EOF &&
int main(void) {
return 0;
}
EOF
git add blame.c &&
git commit -m "add blame.c" &&
ORIG_COMMIT=$(git rev-parse --short HEAD) &&
cat >blame.c <<-\EOF &&
int main(void)
{
return 0;
}
EOF
git add blame.c &&
git commit -m "reformat blame.c" &&
BLAME_COMMIT=$(git rev-parse --short HEAD) &&
# Without no-hunks mode, blame attributes the change.
git blame blame.c >without &&
test_grep "$BLAME_COMMIT" without &&
# With no-hunks mode, the process considers the files equivalent
# and blame skips the reformat commit, attributing to the original.
git -c diff.cdiff.process="$BACKEND --mode=no-hunks" \
blame blame.c >with &&
test_grep ! "$BLAME_COMMIT" with &&
test_grep "$ORIG_COMMIT" with
'
test_expect_success 'blame --no-ext-diff bypasses diff process' '
test_when_finished "rm -f backend.log" &&
git -c diff.cdiff.process="$BACKEND --mode=no-hunks --log=backend.log" \
blame --no-ext-diff blame.c >actual &&
# Without the process, blame attributes the reformat commit normally.
test_grep "$BLAME_COMMIT" actual &&
test_path_is_missing backend.log
'
test_expect_success 'blame --no-ext-diff uses builtin hunks' '
# fixed-hunk mode would narrow blame to lines 5-6, but
# --no-ext-diff should bypass it and use the builtin diff.
test_when_finished "rm -f backend.log" &&
git -c diff.cdiff.process="$BACKEND --mode=fixed-hunk --log=backend.log" \
blame --no-ext-diff blame-hunk.c >actual &&
# Builtin diff attributes lines 9-10 to the change commit.
sed -n "9p" actual >line9 &&
test_grep "$CHANGE" line9 &&
test_path_is_missing backend.log
'
test_done