From 73510d45ab28e425b3173fe3069ec9333c4c7438 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 16:41:17 +0200 Subject: [PATCH 1/7] diff: stop truncating the deflated-binary-diff size on Windows Continue the size_t evacuation around large object handling: with deflate_it() and the locals around it widened, the cast_size_t_to_ulong() shim the prior delta_delta() widening had to leave behind in emit_binary_diff_body() goes away. deflate_it() is file-static; the only callers are the two in emit_binary_diff_body() already touched here. emit_diff_symbol() formats the resulting sizes via uintmax_t / %"PRIuMAX", so the diff output is not affected; only the per-process upper bound on a binary patch chunk that this function can address grows beyond 4 GiB on Windows. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- diff.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/diff.c b/diff.c index c14f69719b..d0be7c8f50 100644 --- a/diff.c +++ b/diff.c @@ -3606,8 +3606,8 @@ static int checkdiff_consume(void *priv, char *line, unsigned long len) } static unsigned char *deflate_it(char *data, - unsigned long size, - unsigned long *result_size) + size_t size, + size_t *result_size) { size_t bound; unsigned char *deflated; @@ -3636,10 +3636,10 @@ static void emit_binary_diff_body(struct diff_options *o, void *delta; void *deflated; void *data; - unsigned long orig_size; - unsigned long delta_size; - unsigned long deflate_size; - unsigned long data_size; + size_t orig_size; + size_t delta_size; + size_t deflate_size; + size_t data_size; /* We could do deflated delta, or we could do just deflated two, * whichever is smaller. @@ -3647,11 +3647,9 @@ static void emit_binary_diff_body(struct diff_options *o, delta = NULL; deflated = deflate_it(two->ptr, two->size, &deflate_size); if (one->size && two->size) { - size_t delta_size_st = 0; delta = diff_delta(one->ptr, one->size, two->ptr, two->size, - &delta_size_st, deflate_size); - delta_size = cast_size_t_to_ulong(delta_size_st); + &delta_size, deflate_size); if (delta) { void *to_free = delta; orig_size = delta_size; From 71c8cacb3f80161c4c36375196326591bd9af60e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 16:44:00 +0200 Subject: [PATCH 2/7] convert: widen gather_convert_stats() helpers to size_t Prep for the upcoming read_blob_data_from_index() widening, whose callers in convert.c feed the size they receive straight into these two helpers. Both are file-static, so the change is contained. Also fixes a small pre-existing narrowing on the get_wt_convert_stats_ascii() path, where strbuf.len (size_t) was passed to a unsigned long parameter. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- convert.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert.c b/convert.c index 036506842c..74d452b0de 100644 --- a/convert.c +++ b/convert.c @@ -102,7 +102,7 @@ static int convert_is_binary(const struct text_stat *stats) return 0; } -static unsigned int gather_convert_stats(const char *data, unsigned long size) +static unsigned int gather_convert_stats(const char *data, size_t size) { struct text_stat stats; int ret = 0; @@ -119,7 +119,7 @@ static unsigned int gather_convert_stats(const char *data, unsigned long size) return ret; } -static const char *gather_convert_stats_ascii(const char *data, unsigned long size) +static const char *gather_convert_stats_ascii(const char *data, size_t size) { unsigned int convert_stats = gather_convert_stats(data, size); From f95e2e4a5f94678264166924f816116452442407 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 17:00:12 +0200 Subject: [PATCH 3/7] read-cache: stop truncating index blob sizes on Windows Continue the size_t evacuation. read_blob_data_from_index() reads the blob through the size_t odb_read_object() API but writes the size back through an unsigned long out-parameter, silently truncating anything past 4 GiB on Windows. Widen the out-parameter, drop the cast_size_t_to_ulong() shim, and move the matching locals in the two convert.c callers and the one in attr.c. Their downstream consumers (gather_convert_stats() widened in the prior commit and read_attr_from_buf() already size_t) take the new type directly. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- attr.c | 2 +- convert.c | 4 ++-- read-cache-ll.h | 2 +- read-cache.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/attr.c b/attr.c index c61472a4e6..b9852d8587 100644 --- a/attr.c +++ b/attr.c @@ -793,7 +793,7 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate, { struct attr_stack *stack = NULL; char *buf; - unsigned long size; + size_t size; int sparse_dir_pos = -1; if (!istate) diff --git a/convert.c b/convert.c index 74d452b0de..77f06fcfdb 100644 --- a/convert.c +++ b/convert.c @@ -141,7 +141,7 @@ const char *get_cached_convert_stats_ascii(struct index_state *istate, const char *path) { const char *ret; - unsigned long sz; + size_t sz; void *data = read_blob_data_from_index(istate, path, &sz); ret = gather_convert_stats_ascii(data, sz); free(data); @@ -223,7 +223,7 @@ static void check_global_conv_flags_eol(const char *path, static int has_crlf_in_index(struct index_state *istate, const char *path) { - unsigned long sz; + size_t sz; void *data; const char *crp; int has_crlf = 0; diff --git a/read-cache-ll.h b/read-cache-ll.h index 2c8b4b21b1..a3643dce24 100644 --- a/read-cache-ll.h +++ b/read-cache-ll.h @@ -411,7 +411,7 @@ int chmod_index_entry(struct index_state *, struct cache_entry *ce, char flip); int ce_same_name(const struct cache_entry *a, const struct cache_entry *b); void set_object_name_for_intent_to_add_entry(struct cache_entry *ce); int index_name_is_other(struct index_state *, const char *, int); -void *read_blob_data_from_index(struct index_state *, const char *, unsigned long *); +void *read_blob_data_from_index(struct index_state *, const char *, size_t *); /* do stat comparison even if CE_VALID is true */ #define CE_MATCH_IGNORE_VALID 01 diff --git a/read-cache.c b/read-cache.c index 21ca58beea..8be8912f16 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3459,7 +3459,7 @@ int index_name_is_other(struct index_state *istate, const char *name, } void *read_blob_data_from_index(struct index_state *istate, - const char *path, unsigned long *size) + const char *path, size_t *size) { int pos, len; size_t sz; @@ -3490,7 +3490,7 @@ void *read_blob_data_from_index(struct index_state *istate, return NULL; } if (size) - *size = cast_size_t_to_ulong(sz); + *size = sz; return data; } From ebb6368e915906d431f2913fb352528213919ca8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 17:33:36 +0200 Subject: [PATCH 4/7] xdiff-interface: widen buffer_is_binary() size parameter to size_t Prep for the widenings of its callers, where size-receiving locals will become size_t (combine-diff's result_size in the immediately following commit, struct diff_filespec.size in a later topic). Body caps the parameter at 8000 anyway, so the type change is mechanical. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- xdiff-interface.c | 2 +- xdiff-interface.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xdiff-interface.c b/xdiff-interface.c index db6938689f..18e37d2479 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -195,7 +195,7 @@ void read_mmblob(mmfile_t *ptr, struct object_database *odb, } #define FIRST_FEW_BYTES 8000 -int buffer_is_binary(const char *ptr, unsigned long size) +int buffer_is_binary(const char *ptr, size_t size) { if (FIRST_FEW_BYTES < size) size = FIRST_FEW_BYTES; diff --git a/xdiff-interface.h b/xdiff-interface.h index ce54e1c0e0..41fa1d7562 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -49,7 +49,7 @@ int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2, int read_mmfile(mmfile_t *ptr, const char *filename); void read_mmblob(mmfile_t *ptr, struct object_database *odb, const struct object_id *oid); -int buffer_is_binary(const char *ptr, unsigned long size); +int buffer_is_binary(const char *ptr, size_t size); void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line, int cflags); void xdiff_clear_find_func(xdemitconf_t *xecfg); From a551c408927eeed50b904da7b65f0c6c91b0699a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 18:07:31 +0200 Subject: [PATCH 5/7] combine-diff: stop truncating combined-diff blob sizes on Windows Continue the size_t evacuation. With buffer_is_binary() widened in the prior commit, every consumer that the size flows into in combine-diff.c is size_t-ready, so widen grab_blob()'s out-param outright and move the matching locals at its three call sites together. grab_blob()'s body collapses to a direct odb_read_object(&size) since the bridge variable is no longer needed. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- combine-diff.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/combine-diff.c b/combine-diff.c index fb72174918..4915bf335d 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -304,7 +304,7 @@ static struct lline *coalesce_lines(struct lline *base, int *lenbase, static char *grab_blob(struct repository *r, const struct object_id *oid, unsigned int mode, - unsigned long *size, struct userdiff_driver *textconv, + size_t *size, struct userdiff_driver *textconv, const char *path) { char *blob; @@ -325,9 +325,7 @@ static char *grab_blob(struct repository *r, *size = fill_textconv(r, textconv, df, &blob); free_filespec(df); } else { - size_t size_st = 0; - blob = odb_read_object(r->objects, oid, &type, &size_st); - *size = cast_size_t_to_ulong(size_st); + blob = odb_read_object(r->objects, oid, &type, size); if (!blob) die(_("unable to read %s"), oid_to_hex(oid)); if (type != OBJ_BLOB) @@ -431,7 +429,7 @@ static void combine_diff(struct repository *r, xdemitconf_t xecfg; mmfile_t parent_file; struct combine_diff_state state; - unsigned long sz; + size_t sz; if (result_deleted) return; /* result deleted */ @@ -1015,7 +1013,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent, struct rev_info *rev) { struct diff_options *opt = &rev->diffopt; - unsigned long result_size, cnt, lno; + size_t result_size, cnt, lno; int result_deleted = 0; char *result, *cp; struct sline *sline; /* survived lines */ @@ -1134,7 +1132,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent, is_binary = buffer_is_binary(result, result_size); for (i = 0; !is_binary && i < num_parent; i++) { char *buf; - unsigned long size; + size_t size; buf = grab_blob(opt->repo, &elem->parent[i].oid, elem->parent[i].mode, From 518d12a44f79d9334db06e51ff8502d09335cb99 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 19:26:00 +0200 Subject: [PATCH 6/7] diff: widen textconv_object() size out-param to size_t Continue the size_t evacuation. textconv_object() fills its out-parameter from fill_textconv()'s size_t return through an unsigned long*; widen the API to match, then take advantage of the new shape where callers can. cat-file's 'c' and batch-mode 'c' branches lose their size_ul bridge variables (one site becomes a direct call, the other collapses an if/else into a single negated condition that reads as "try textconv, fall back to a raw read"). blame.c likewise drops the file_size_st bridge in fill_origin_blob() and hoists final_buf_size_st to bracket both branches in setup_scoreboard(). The latter keeps a cast_size_t_to_ulong() shim because struct blame_scoreboard.final_buf_size is still unsigned long; that field is its own topic. log.c just widens its local from unsigned long to size_t. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- blame.c | 21 ++++++++------------- builtin/cat-file.c | 13 ++++--------- builtin/log.c | 2 +- diff.c | 2 +- diff.h | 2 +- 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/blame.c b/blame.c index 126e232416..6cdeabd633 100644 --- a/blame.c +++ b/blame.c @@ -238,7 +238,7 @@ static struct commit *fake_working_tree_commit(struct repository *r, struct stat st; const char *read_from; char *buf_ptr; - unsigned long buf_len; + size_t buf_len; if (contents_from) { if (stat(contents_from, &st) < 0) @@ -1034,20 +1034,17 @@ static void fill_origin_blob(struct diff_options *opt, { if (!o->file.ptr) { enum object_type type; - unsigned long file_size; + size_t file_size; (*num_read_blob)++; if (opt->flags.allow_textconv && textconv_object(opt->repo, o->path, o->mode, &o->blob_oid, 1, &file->ptr, &file_size)) ; - else { - size_t file_size_st = 0; + else file->ptr = odb_read_object(the_repository->objects, &o->blob_oid, &type, - &file_size_st); - file_size = cast_size_t_to_ulong(file_size_st); - } + &file_size); file->size = file_size; if (!file->ptr) @@ -2864,22 +2861,20 @@ void setup_scoreboard(struct blame_scoreboard *sb, sb->final_buf_size = o->file.size; } else { + size_t final_buf_size_st = 0; o = get_origin(sb->final, sb->path); if (fill_blob_sha1_and_mode(sb->repo, o)) die(_("no such path %s in %s"), sb->path, final_commit_name); if (sb->revs->diffopt.flags.allow_textconv && textconv_object(sb->repo, sb->path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf, - &sb->final_buf_size)) + &final_buf_size_st)) ; - else { - size_t final_buf_size_st = 0; + else sb->final_buf = odb_read_object(the_repository->objects, &o->blob_oid, &type, &final_buf_size_st); - sb->final_buf_size = - cast_size_t_to_ulong(final_buf_size_st); - } + sb->final_buf_size = cast_size_t_to_ulong(final_buf_size_st); if (!sb->final_buf) die(_("cannot read blob %s for path %s"), diff --git a/builtin/cat-file.c b/builtin/cat-file.c index d6ef8414ee..912e1ef403 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -186,11 +186,9 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) case 'c': { - unsigned long size_ul = 0; int textconv_ret = textconv_object(the_repository, path, obj_context.mode, &oid, 1, - &buf, &size_ul); - size = size_ul; + &buf, &size); if (textconv_ret) break; } @@ -413,12 +411,9 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d oid_to_hex(oid), data->rest); } else if (opt->transform_mode == 'c') { enum object_type type; - unsigned long size_ul = 0; - if (textconv_object(the_repository, - data->rest, 0100644, oid, - 1, &contents, &size_ul)) - size = size_ul; - else + if (!textconv_object(the_repository, + data->rest, 0100644, oid, + 1, &contents, &size)) contents = odb_read_object(the_repository->objects, oid, &type, &size); if (!contents) diff --git a/builtin/log.c b/builtin/log.c index d027ce1e0b..2f5142e888 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c struct object_id oidc; struct object_context obj_context = {0}; char *buf; - unsigned long size; + size_t size; fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || diff --git a/diff.c b/diff.c index d0be7c8f50..f0b4ffe512 100644 --- a/diff.c +++ b/diff.c @@ -7845,7 +7845,7 @@ int textconv_object(struct repository *r, const struct object_id *oid, int oid_valid, char **buf, - unsigned long *buf_size) + size_t *buf_size) { struct diff_filespec *df; struct userdiff_driver *textconv; diff --git a/diff.h b/diff.h index bb5cddaf34..ab52ca80c3 100644 --- a/diff.h +++ b/diff.h @@ -757,7 +757,7 @@ int textconv_object(struct repository *repo, const char *path, unsigned mode, const struct object_id *oid, int oid_valid, - char **buf, unsigned long *buf_size); + char **buf, size_t *buf_size); int parse_rename_score(const char **cp_p); From 4bfc0989753d0ecd32aecc1dcb1e4f1f58331954 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Jun 2026 19:38:13 +0200 Subject: [PATCH 7/7] diffcore: widen struct diff_filespec.size to size_t Continue the size_t evacuation. The struct field already receives its writes from a size_t-shaped source (xsize_t(st.st_size), strbuf.len, fill_textconv()'s return, odb_read_object_info_extended() via oi.sizep), so on Windows it was already truncating anything past 4 GiB silently on the strbuf and textconv paths and loudly through cast_size_t_to_ulong() on the odb path. Switch the field to size_t. In diff_populate_filespec(), point oi.sizep at the field directly and drop both cast_size_t_to_ulong() shims and the size_st bridge they fed. Downstream consumers that still read .size into unsigned long locals will now silently narrow on Windows where the field exceeds 4 GiB. Each of those is its own follow-up; the writer side is the prerequisite for ever putting a >4 GiB value in the field in the first place. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- diff.c | 5 +---- diffcore.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/diff.c b/diff.c index f0b4ffe512..de5ff1f7d0 100644 --- a/diff.c +++ b/diff.c @@ -4595,9 +4595,8 @@ int diff_populate_filespec(struct repository *r, } } else { - size_t size_st = 0; struct object_info info = { - .sizep = &size_st + .sizep = &s->size }; if (!(size_only || check_binary)) @@ -4619,7 +4618,6 @@ int diff_populate_filespec(struct repository *r, die("unable to read %s", oid_to_hex(&s->oid)); object_read: - s->size = cast_size_t_to_ulong(size_st); if (size_only || check_binary) { if (size_only) return 0; @@ -4634,7 +4632,6 @@ object_read: if (odb_read_object_info_extended(r->objects, &s->oid, &info, OBJECT_INFO_LOOKUP_REPLACE)) die("unable to read %s", oid_to_hex(&s->oid)); - s->size = cast_size_t_to_ulong(size_st); } s->should_free = 1; } diff --git a/diffcore.h b/diffcore.h index d75038d1b3..85fc94e2a5 100644 --- a/diffcore.h +++ b/diffcore.h @@ -54,7 +54,7 @@ struct diff_filespec { char *path; void *data; void *cnt_data; - unsigned long size; + size_t size; int count; /* Reference count */ int rename_used; /* Count of rename users */ unsigned short mode; /* file mode */