Merge branch 'pw/xdiff-shrink-memory-consumption' into seen

Shrink wasted memory in Myers diff that does not account for common
prefix and suffix removal.

* pw/xdiff-shrink-memory-consumption:
  xdiff: reduce the size of array
  xprepare: simplify error handling
  xdiff: cleanup xdl_clean_mmatch()
  xdiff: reduce size of action arrays
This commit is contained in:
Junio C Hamano
2026-04-09 15:11:39 -07:00

View File

@@ -171,12 +171,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2))
goto abort;
if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
(XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1))
goto abort;
}
xdf->changed += 1;
xdf->nreff = 0;
xdf->dstart = 0;
@@ -197,8 +191,9 @@ void xdl_free_env(xdfenv_t *xe) {
}
static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) {
static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t len) {
ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1;
ptrdiff_t s = 0, e = len - 1;
/*
* Limits the window that is examined during the similar-lines
@@ -273,16 +268,19 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
uint8_t *action1 = NULL, *action2 = NULL;
bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
int ret = 0;
ptrdiff_t off = xdf1->dstart;
ptrdiff_t len1 = xdf1->dend - off + 1;
ptrdiff_t len2 = xdf2->dend - off + 1;
/*
* Create temporary arrays that will help us decide if
* changed[i] should remain false, or become true.
*/
if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) {
ret = -1;
goto cleanup;
}
if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) {
if (!XDL_CALLOC_ARRAY(action1, len1) ||
!XDL_CALLOC_ARRAY(action2, len2) ||
!XDL_ALLOC_ARRAY(xdf1->reference_index, len1) ||
!XDL_ALLOC_ARRAY(xdf2->reference_index, len2))
{
ret = -1;
goto cleanup;
}
@@ -299,8 +297,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
/*
* Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
*/
for (i = xdf1->dstart; i <= xdf1->dend; i++) {
size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
for (i = 0; i < len1; i++) {
size_t mph1 = xdf1->recs[i + off].minimal_perfect_hash;
rcrec = cf->rcrecs[mph1];
nm = rcrec ? rcrec->len2 : 0;
if (nm == 0)
@@ -311,8 +309,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
action1[i] = INVESTIGATE;
}
for (i = xdf2->dstart; i <= xdf2->dend; i++) {
size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
for (i = 0; i < len2; i++) {
size_t mph2 = xdf2->recs[i + off].minimal_perfect_hash;
rcrec = cf->rcrecs[mph2];
nm = rcrec ? rcrec->len1 : 0;
if (nm == 0)
@@ -328,25 +326,25 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
* false, or become true.
*/
xdf1->nreff = 0;
for (i = xdf1->dstart; i <= xdf1->dend; i++) {
for (i = 0; i < len1; i++) {
if (action1[i] == KEEP ||
(action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) {
xdf1->reference_index[xdf1->nreff++] = i;
(action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, len1))) {
xdf1->reference_index[xdf1->nreff++] = i + off;
/* changed[i] remains false, i.e. keep */
} else {
xdf1->changed[i] = true;
xdf1->changed[i + off] = true;
/* i.e. discard */
}
}
xdf2->nreff = 0;
for (i = xdf2->dstart; i <= xdf2->dend; i++) {
for (i = 0; i < len2; i++) {
if (action2[i] == KEEP ||
(action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) {
xdf2->reference_index[xdf2->nreff++] = i;
(action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, len2))) {
xdf2->reference_index[xdf2->nreff++] = i + off;
/* changed[i] remains false, i.e. keep */
} else {
xdf2->changed[i] = true;
xdf2->changed[i + off] = true;
/* i.e. discard */
}
}