xdiff: support external hunks via xpparam_t

Add two new xpparam_t fields (external_hunks, external_hunks_nr)
that let callers supply pre-computed hunks.  When set, xdl_diff()
populates the changed[] arrays from these hunks instead of running
the diff algorithm, then continues through compaction and emission
as usual.

Validate supplied hunks before use: reject out-of-bounds line
numbers, overlapping or out-of-order hunks, negative counts, and
violations of the synchronization invariant (unchanged line counts
must match between files).  On validation failure, fall back to
the builtin diff algorithm; this re-runs xdl_prepare_env() since
the first call may have dirtied the changed[] arrays.

Skip trim_common_tail() in xdi_diff() when external hunks are
present, since external hunks reference line numbers in the
original content.

Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Michael Montalbo
2026-05-29 20:48:14 +00:00
committed by Junio C Hamano
parent c69baaf57b
commit c47aed4f0f
5 changed files with 114 additions and 4 deletions

View File

@@ -124,7 +124,12 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co
if (mf1->size > MAX_XDIFF_SIZE || mf2->size > MAX_XDIFF_SIZE)
return -1;
if (!xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
/*
* External hunks reference line numbers in the original content;
* trimming the tail would change line counts and invalidate them.
*/
if (!xpp->external_hunks &&
!xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
trim_common_tail(&a, &b);
return xdl_diff(&a, &b, xpp, xecfg, xecb);

View File

@@ -78,6 +78,15 @@ typedef struct s_mmbuffer {
long size;
} mmbuffer_t;
/*
* Hunk descriptor for externally computed diffs.
* Line numbers are 1-based, matching unified diff convention.
*/
struct xdl_hunk {
long old_start, old_count;
long new_start, new_count;
};
typedef struct s_xpparam {
unsigned long flags;
@@ -88,6 +97,10 @@ typedef struct s_xpparam {
/* See Documentation/diff-options.adoc. */
char **anchors;
size_t anchors_nr;
/* Externally computed hunks: bypass the diff algorithm. Owned by caller. */
struct xdl_hunk *external_hunks;
size_t external_hunks_nr;
} xpparam_t;
typedef struct s_xdemitcb {

View File

@@ -1085,16 +1085,97 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
}
}
/*
* Populate the changed[] arrays from externally supplied hunks,
* bypassing the diff algorithm. Validates that hunks are in order,
* non-overlapping, and within bounds.
*
* Returns 0 on success, -1 on validation failure.
*/
static int xdl_populate_hunks_from_external(xdfenv_t *xe,
struct xdl_hunk *hunks,
size_t nr_hunks)
{
size_t i;
long j, prev_old_end = 0, prev_new_end = 0;
long total_old = 0, total_new = 0;
/*
* xdl_prepare_env() may dirty changed[] via xdl_cleanup_records().
* Clear them so only the external hunks are marked.
*/
xdl_clear_changed(&xe->xdf1);
xdl_clear_changed(&xe->xdf2);
for (i = 0; i < nr_hunks; i++) {
struct xdl_hunk *h = &hunks[i];
if (h->old_count < 0 || h->new_count < 0)
return -1;
if (h->old_start < 1 || h->new_start < 1)
return -1;
/*
* Range must fit: start + count - 1 <= nrec,
* rewritten to avoid overflow. Same for both sides.
*
* When count is 0 (pure insert/delete) the check
* reduces to 0 > nrec - start + 1, which rejects
* start > nrec + 1 and allows start == nrec + 1
* (the position after the last line).
*/
if (h->old_count > (long)xe->xdf1.nrec - h->old_start + 1)
return -1;
if (h->new_count > (long)xe->xdf2.nrec - h->new_start + 1)
return -1;
/* Ordering: no overlap with previous hunk (adjacent is OK) */
if (h->old_start < prev_old_end ||
h->new_start < prev_new_end)
return -1;
for (j = 0; j < h->old_count; j++)
xe->xdf1.changed[h->old_start - 1 + j] = true;
for (j = 0; j < h->new_count; j++)
xe->xdf2.changed[h->new_start - 1 + j] = true;
prev_old_end = h->old_start + h->old_count;
prev_new_end = h->new_start + h->new_count;
total_old += h->old_count;
total_new += h->new_count;
}
/*
* Synchronization invariant: unchanged line counts must match.
* Otherwise xdl_build_script() would walk off one array.
*/
if ((long)xe->xdf1.nrec - total_old !=
(long)xe->xdf2.nrec - total_new)
return -1;
return 0;
}
int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
xdchange_t *xscr;
xdfenv_t xe;
emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
return -1;
if (xpp->external_hunks) {
if (xdl_prepare_env(mf1, mf2, xpp, &xe) < 0)
return -1;
if (xdl_populate_hunks_from_external(&xe,
xpp->external_hunks,
xpp->external_hunks_nr) == 0)
goto diff_done;
xdl_free_env(&xe);
}
if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0)
return -1;
diff_done:
if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
xdl_build_script(&xe, &xscr) < 0) {

View File

@@ -471,3 +471,13 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
return 0;
}
/*
* Reset the changed[] array so that no lines are marked as changed.
* Also clears the sentinel slots at changed[-1] and changed[nrec]
* that xdl_change_compact() relies on during backward scans.
*/
void xdl_clear_changed(xdfile_t *xdf)
{
memset(xdf->changed - 1, 0, (xdf->nrec + 2) * sizeof(bool));
}

View File

@@ -28,6 +28,7 @@
int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe);
void xdl_free_env(xdfenv_t *xe);
void xdl_clear_changed(xdfile_t *xdf);