From 81e29064371c4b4599b171ac71e73d1e21475a63 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:43 -0400 Subject: [PATCH 1/5] pack-objects: plug leak in `read_stdin_packs()` The `read_stdin_packs()` function added originally via 339bce27f4f (builtin/pack-objects.c: add '--stdin-packs' option, 2021-02-22) declares a `rev_info` struct but neglects to call `release_revisions()` on it before returning, creating the potential for a leak. The related change in 97ec43247c0 (pack-objects: declare 'rev_info' for '--stdin-packs' earlier, 2025-06-23) carried forward this oversight and did not address it. Ensure that we call `release_revisions()` appropriately to prevent a potential leak from this function. Note that in practice our `rev_info` here does not have a present leak, hence t5331 passes cleanly before this commit, even when built with SANITIZE=leak. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cd013c0b68..9a89bc5c4c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3968,6 +3968,8 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) show_object_pack_hint, &mode); + release_revisions(&revs); + trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", From d31d1f2e06942046b5220942c77245725d7df2c1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:46 -0400 Subject: [PATCH 2/5] pack-objects: refactor `read_packs_list_from_stdin()` to use `strmap` The '--stdin-packs' mode of pack-objects maintains two separate string_lists: one for included packs, and one for excluded packs. Each list stores the pack basename as a string and the corresponding `packed_git` pointer in its `->util` field. This works, but makes it awkward to extend the set of pack "kinds" that pack-objects can accept via stdin, since each new kind would need its own string_list and duplicated handling. A future commit will want to do just this, so prepare for that change by handling the various "kinds" of packs specified over stdin in a more generic fashion. Namely, replace the two `string_list`s with a single `strmap` keyed on the pack basename, with values pointing to a new `struct stdin_pack_info`. This struct tracks both the `packed_git` pointer and a `kind` bitfield indicating whether the pack was specified as included or excluded. Extract the logic for sorting packs by mtime and adding their objects into a separate `stdin_packs_add_pack_entries()` helper. While we could have used a `string_list`, we must handle the case where the same pack is specified more than once. With a `string_list` only, we would have to pay a quadratic cost to either (a) insert elements into their sorted positions, or (b) a repeated linear search, which is accidentally quadratic. For that reason, use a strmap instead. This patch does not include any functional changes. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 195 +++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 75 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 9a89bc5c4c..945100b405 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -28,6 +28,7 @@ #include "reachable.h" #include "oid-array.h" #include "strvec.h" +#include "strmap.h" #include "list.h" #include "packfile.h" #include "object-file.h" @@ -3820,87 +3821,61 @@ static void show_commit_pack_hint(struct commit *commit, void *data) } +/* + * stdin_pack_info_kind specifies how a pack specified over stdin + * should be treated when pack-objects is invoked with --stdin-packs. + * + * - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set + * should be included in the output pack, unless they appear in an + * excluded pack. + * + * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag + * bit set should be excluded from the output pack. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are + * used as traversal tips when invoked with --stdin-packs=follow. + */ +enum stdin_pack_info_kind { + STDIN_PACK_INCLUDE = (1<<0), + STDIN_PACK_EXCLUDE_CLOSED = (1<<1), +}; + +struct stdin_pack_info { + struct packed_git *p; + enum stdin_pack_info_kind kind; +}; + static int pack_mtime_cmp(const void *_a, const void *_b) { - struct packed_git *a = ((const struct string_list_item*)_a)->util; - struct packed_git *b = ((const struct string_list_item*)_b)->util; + struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util; + struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util; /* * order packs by descending mtime so that objects are laid out * roughly as newest-to-oldest */ - if (a->mtime < b->mtime) + if (a->p->mtime < b->p->mtime) return 1; - else if (b->mtime < a->mtime) + else if (b->p->mtime < a->p->mtime) return -1; else return 0; } -static void read_packs_list_from_stdin(struct rev_info *revs) +static void stdin_packs_add_pack_entries(struct strmap *packs, + struct rev_info *revs) { - struct strbuf buf = STRBUF_INIT; - struct string_list include_packs = STRING_LIST_INIT_DUP; - struct string_list exclude_packs = STRING_LIST_INIT_DUP; - struct string_list_item *item = NULL; - struct packed_git *p; + struct string_list keys = STRING_LIST_INIT_NODUP; + struct string_list_item *item; + struct hashmap_iter iter; + struct strmap_entry *entry; - while (strbuf_getline(&buf, stdin) != EOF) { - if (!buf.len) - continue; + strmap_for_each_entry(packs, &iter, entry) { + struct stdin_pack_info *info = entry->value; + if (!info->p) + die(_("could not find pack '%s'"), entry->key); - if (*buf.buf == '^') - string_list_append(&exclude_packs, buf.buf + 1); - else - string_list_append(&include_packs, buf.buf); - - strbuf_reset(&buf); - } - - string_list_sort_u(&include_packs, 0); - string_list_sort_u(&exclude_packs, 0); - - repo_for_each_pack(the_repository, p) { - const char *pack_name = pack_basename(p); - - if ((item = string_list_lookup(&include_packs, pack_name))) { - if (exclude_promisor_objects && p->pack_promisor) - die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); - item->util = p; - } - if ((item = string_list_lookup(&exclude_packs, pack_name))) - item->util = p; - } - - /* - * Arguments we got on stdin may not even be packs. First - * check that to avoid segfaulting later on in - * e.g. pack_mtime_cmp(), excluded packs are handled below. - * - * Since we first parsed our STDIN and then sorted the input - * lines the pack we error on will be whatever line happens to - * sort first. This is lazy, it's enough that we report one - * bad case here, we don't need to report the first/last one, - * or all of them. - */ - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - if (!is_pack_valid(p)) - die(_("packfile %s cannot be accessed"), p->pack_name); - } - - /* - * Then, handle all of the excluded packs, marking them as - * kept in-core so that later calls to add_object_entry() - * discards any objects that are also found in excluded packs. - */ - for_each_string_list_item(item, &exclude_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - p->pack_keep_in_core = 1; + string_list_append(&keys, entry->key)->util = info; } /* @@ -3908,19 +3883,89 @@ static void read_packs_list_from_stdin(struct rev_info *revs) * string_list_item's ->util pointer, which string_list_sort() does not * provide. */ - QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp); + QSORT(keys.items, keys.nr, pack_mtime_cmp); - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - for_each_object_in_pack(p, - add_object_entry_from_pack, - revs, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + for_each_string_list_item(item, &keys) { + struct stdin_pack_info *info = item->util; + + if (info->kind & STDIN_PACK_INCLUDE) + for_each_object_in_pack(info->p, + add_object_entry_from_pack, + revs, + ODB_FOR_EACH_OBJECT_PACK_ORDER); } + string_list_clear(&keys, 0); +} + +static void stdin_packs_read_input(struct rev_info *revs) +{ + struct strbuf buf = STRBUF_INIT; + struct strmap packs = STRMAP_INIT; + struct packed_git *p; + + while (strbuf_getline(&buf, stdin) != EOF) { + struct stdin_pack_info *info; + enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE; + const char *key = buf.buf; + + if (!*key) + continue; + else if (*key == '^') + kind = STDIN_PACK_EXCLUDE_CLOSED; + + if (kind != STDIN_PACK_INCLUDE) + key++; + + info = strmap_get(&packs, key); + if (!info) { + CALLOC_ARRAY(info, 1); + strmap_put(&packs, key, info); + } + + info->kind |= kind; + + strbuf_reset(&buf); + } + + repo_for_each_pack(the_repository, p) { + struct stdin_pack_info *info; + + info = strmap_get(&packs, pack_basename(p)); + if (!info) + continue; + + if (info->kind & STDIN_PACK_INCLUDE) { + if (exclude_promisor_objects && p->pack_promisor) + die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); + + /* + * Arguments we got on stdin may not even be + * packs. First check that to avoid segfaulting + * later on in e.g. pack_mtime_cmp(), excluded + * packs are handled below. + */ + if (!is_pack_valid(p)) + die(_("packfile %s cannot be accessed"), p->pack_name); + } + + if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) { + /* + * Marking excluded packs as kept in-core so + * that later calls to add_object_entry() + * discards any objects that are also found in + * excluded packs. + */ + p->pack_keep_in_core = 1; + } + + info->p = p; + } + + stdin_packs_add_pack_entries(&packs, revs); + strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); + strmap_clear(&packs, 1); } static void add_unreachable_loose_objects(struct rev_info *revs); @@ -3957,7 +4002,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(&revs); + stdin_packs_read_input(&revs); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); From 5a4381f093508f0504a99b1abc9ca5e8ebd0901f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:49 -0400 Subject: [PATCH 3/5] t7704: demonstrate failure with once-cruft objects above the geometric split Add a test demonstrating a case where geometric repacking fails to produce a pack with full object closure, thus making it impossible to write a reachability bitmap. Mark the test with 'test_expect_failure' for now. The subsequent commit will explain the precise failure mode, and implement a fix. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t7704-repack-cruft.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index aa2e2e6ad8..77133395b5 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,4 +869,26 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' +test_expect_failure 'repack rescues once-cruft objects above geometric split' ' + git config repack.midxMustContainCruft false && + + test_commit reachable && + test_commit unreachable && + + unreachable="$(git rev-parse HEAD)" && + + git reset --hard HEAD^ && + git tag -d unreachable && + git reflog expire --all --expire=all && + + git repack --cruft -d && + + echo $unreachable | git pack-objects .git/objects/pack/pack && + + test_commit new && + + git update-ref refs/heads/other $unreachable && + git repack --geometric=2 -d --write-midx --write-bitmap-index +' + test_done From 3f7c0e722e2733aede32b1e531caf83e7043d1bd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:51 -0400 Subject: [PATCH 4/5] pack-objects: support excluded-open packs with --stdin-packs In cd846bacc7d (pack-objects: introduce '--stdin-packs=follow', 2025-06-23), pack-objects learned to traverse through commits in included packs when using '--stdin-packs=follow', rescuing reachable objects from unlisted packs into the output. When we encounter a commit in an excluded pack during this rescuing phase we will traverse through its parents. But because we set `revs.no_kept_objects = 1`, commit simplification will prevent us from showing it via `get_revision()`. (In practice, `--stdin-packs=follow` walks commits down to the roots, but only opens up trees for ones that do not appear in an excluded pack.) But there are certain cases where we *do* need to see the parents of an object in an excluded pack. Namely, if an object is rescue-able, but only reachable from object(s) which appear in excluded packs, then commit simplification will exclude those commits from the object traversal, and we will never see a copy of that object, and thus not rescue it. This is what causes the failure in the previous commit during repacking. When performing a geometric repack, packs above the geometric split that weren't part of the previous MIDX (e.g., packs pushed directly into `$GIT_DIR/objects/pack`) may not have full object closure. When those packs are listed as excluded via the '^' marker, the reachability traversal encounters the sequence described above, and may miss objects which we expect to rescue with `--stdin-packs=follow`. Introduce a new "excluded-open" pack prefix, '!'. Like '^'-prefixed packs, objects from '!'-prefixed packs are excluded from the resulting pack. But unlike '^', commits in '!'-prefixed packs *are* used as starting points for the follow traversal, and the traversal does not treat them as a closure boundary. In order to distinguish excluded-closed from excluded-open packs during the traversal, introduce a new `pack_keep_in_core_open` bit on `struct packed_git`, along with a corresponding `KEPT_PACK_IN_CORE_OPEN` flag for the kept-pack cache. In `add_object_entry_from_pack()`, move the `want_object_in_pack()` check to *after* `add_pending_oid()`. This is necessary so that commits from excluded-open packs are added as traversal tips even though their objects won't appear in the output. As a consequence, the caller `for_each_object_in_pack()` will always provide a non-NULL 'p', hence we are able to drop the "if (p)" conditional. The `include_check` and `include_check_obj` callbacks on `rev_info` are used to halt the walk at closed-excluded packs, since objects behind a '^' boundary are guaranteed to have closure and need not be rescued. The following commit will make use of this new functionality within the repack layer to resolve the test failure demonstrated in the previous commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.adoc | 25 ++++-- builtin/pack-objects.c | 116 ++++++++++++++++++++++------ packfile.c | 3 +- packfile.h | 2 + t/t5331-pack-objects-stdin.sh | 105 +++++++++++++++++++++++++ 5 files changed, 218 insertions(+), 33 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 71b9682485..b78175fbe1 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -94,13 +94,24 @@ base-name:: included packs (those not beginning with `^`), excluding any objects listed in the excluded packs (beginning with `^`). + -When `mode` is "follow", objects from packs not listed on stdin receive -special treatment. Objects within unlisted packs will be included if -those objects are (1) reachable from the included packs, and (2) not -found in any excluded packs. This mode is useful, for example, to -resurrect once-unreachable objects found in cruft packs to generate -packs which are closed under reachability up to the boundary set by the -excluded packs. +When `mode` is "follow" packs may additionally be prefixed with `!`, +indicating that they are excluded but not necessarily closed under +reachability. In addition to objects in included packs, the resulting +pack may include additional objects based on the following: ++ +-- +* If any packs are marked with `!`, then objects reachable from such + packs or included ones via objects outside of excluded-closed packs + will be included. In this case, all `^` packs are treated as closed + under reachability. +* Otherwise (if there are no `!` packs), objects within unlisted packs + will be included if those objects are (1) reachable from the + included packs, and (2) not found in any excluded packs. +-- ++ +This mode is useful, for example, to resurrect once-unreachable +objects found in cruft packs to generate packs which are closed under +reachability up to the boundary set by the excluded packs. + Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 945100b405..7b97784d6c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -217,6 +217,7 @@ static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_open; static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; @@ -1618,7 +1619,8 @@ static int want_found_object(const struct object_id *oid, int exclude, /* * Then handle .keep first, as we have a fast(er) path there. */ - if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { + if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core || + ignore_packed_keep_in_core_open) { /* * Set the flags for the kept-pack cache to be the ones we want * to ignore. @@ -1632,6 +1634,8 @@ static int want_found_object(const struct object_id *oid, int exclude, flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) flags |= KEPT_PACK_IN_CORE; + if (ignore_packed_keep_in_core_open) + flags |= KEPT_PACK_IN_CORE_OPEN; /* * If the object is in a pack that we want to ignore, *and* we @@ -1643,6 +1647,8 @@ static int want_found_object(const struct object_id *oid, int exclude, return 0; if (ignore_packed_keep_in_core && p->pack_keep_in_core) return 0; + if (ignore_packed_keep_in_core_open && p->pack_keep_in_core_open) + return 0; if (has_object_kept_pack(p->repo, oid, flags)) return 0; } else { @@ -3742,6 +3748,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, void *_data) { off_t ofs; + struct object_info oi = OBJECT_INFO_INIT; enum object_type type = OBJ_NONE; display_progress(progress_state, ++nr_seen); @@ -3749,29 +3756,34 @@ static int add_object_entry_from_pack(const struct object_id *oid, if (have_duplicate_entry(oid, 0)) return 0; + stdin_packs_found_nr++; + ofs = nth_packed_object_offset(p, pos); + + oi.typep = &type; + if (packed_object_info(p, ofs, &oi) < 0) { + die(_("could not get type of object %s in pack %s"), + oid_to_hex(oid), p->pack_name); + } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; + /* + * commits in included packs are used as starting points + * for the subsequent revision walk + * + * Note that we do want to walk through commits that are + * present in excluded-open ('!') packs to pick up any + * objects reachable from them not present in the + * excluded-closed ('^') packs. + * + * However, we'll only add those objects to the packing + * list after checking `want_object_in_pack()` below. + */ + add_pending_oid(revs, NULL, oid, 0); + } + if (!want_object_in_pack(oid, 0, &p, &ofs)) return 0; - if (p) { - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = &type; - if (packed_object_info(p, ofs, &oi) < 0) { - die(_("could not get type of object %s in pack %s"), - oid_to_hex(oid), p->pack_name); - } else if (type == OBJ_COMMIT) { - struct rev_info *revs = _data; - /* - * commits in included packs are used as starting points for the - * subsequent revision walk - */ - add_pending_oid(revs, NULL, oid, 0); - } - - stdin_packs_found_nr++; - } - create_object_entry(oid, type, 0, 0, 0, p, ofs); return 0; @@ -3832,12 +3844,18 @@ static void show_commit_pack_hint(struct commit *commit, void *data) * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag * bit set should be excluded from the output pack. * - * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are - * used as traversal tips when invoked with --stdin-packs=follow. + * - STDIN_PACK_EXCLUDE_OPEN: objects in any packs with this flag + * bit set should be excluded from the output pack, but are not + * guaranteed to be closed under reachability. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE or + * STDIN_PACK_EXCLUDE_OPEN are used as traversal tips when invoked + * with --stdin-packs=follow. */ enum stdin_pack_info_kind { STDIN_PACK_INCLUDE = (1<<0), STDIN_PACK_EXCLUDE_CLOSED = (1<<1), + STDIN_PACK_EXCLUDE_OPEN = (1<<2), }; struct stdin_pack_info { @@ -3862,6 +3880,17 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } +static int stdin_packs_include_check_obj(struct object *obj, void *data UNUSED) +{ + return !has_object_kept_pack(to_pack.repo, &obj->oid, + KEPT_PACK_IN_CORE); +} + +static int stdin_packs_include_check(struct commit *commit, void *data) +{ + return stdin_packs_include_check_obj((struct object *)commit, data); +} + static void stdin_packs_add_pack_entries(struct strmap *packs, struct rev_info *revs) { @@ -3888,7 +3917,19 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, for_each_string_list_item(item, &keys) { struct stdin_pack_info *info = item->util; - if (info->kind & STDIN_PACK_INCLUDE) + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * When open-excluded packs ("!") are present, stop + * the parent walk at closed-excluded ("^") packs. + * Objects behind a "^" boundary are guaranteed to + * have closure and should not be rescued. + */ + revs->include_check = stdin_packs_include_check; + revs->include_check_obj = stdin_packs_include_check_obj; + } + + if ((info->kind & STDIN_PACK_INCLUDE) || + (info->kind & STDIN_PACK_EXCLUDE_OPEN)) for_each_object_in_pack(info->p, add_object_entry_from_pack, revs, @@ -3898,7 +3939,8 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, string_list_clear(&keys, 0); } -static void stdin_packs_read_input(struct rev_info *revs) +static void stdin_packs_read_input(struct rev_info *revs, + enum stdin_packs_mode mode) { struct strbuf buf = STRBUF_INIT; struct strmap packs = STRMAP_INIT; @@ -3913,6 +3955,8 @@ static void stdin_packs_read_input(struct rev_info *revs) continue; else if (*key == '^') kind = STDIN_PACK_EXCLUDE_CLOSED; + else if (*key == '!' && mode == STDIN_PACKS_MODE_FOLLOW) + kind = STDIN_PACK_EXCLUDE_OPEN; if (kind != STDIN_PACK_INCLUDE) key++; @@ -3959,6 +4003,20 @@ static void stdin_packs_read_input(struct rev_info *revs) p->pack_keep_in_core = 1; } + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * Marking excluded open packs as kept in-core + * (open) for the same reason as we marked + * exclude closed packs as kept in-core. + * + * Use a separate flag here to ensure we don't + * halt our traversal at these packs, since they + * are not guaranteed to have closure. + * + */ + p->pack_keep_in_core_open = 1; + } + info->p = p; } @@ -4002,7 +4060,15 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - stdin_packs_read_input(&revs); + if (mode == STDIN_PACKS_MODE_FOLLOW) { + /* + * In '--stdin-packs=follow' mode, additionally ignore + * objects in excluded-open packs to prevent them from + * appearing in the resulting pack. + */ + ignore_packed_keep_in_core_open = 1; + } + stdin_packs_read_input(&revs, mode); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); diff --git a/packfile.c b/packfile.c index 215a23e42b..076e444e32 100644 --- a/packfile.c +++ b/packfile.c @@ -2246,7 +2246,8 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st struct packed_git *p = e->pack; if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || - (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE)) || + (p->pack_keep_in_core_open && (flags & KEPT_PACK_IN_CORE_OPEN))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } diff --git a/packfile.h b/packfile.h index 8b04a258a7..b7735c1977 100644 --- a/packfile.h +++ b/packfile.h @@ -28,6 +28,7 @@ struct packed_git { unsigned pack_local:1, pack_keep:1, pack_keep_in_core:1, + pack_keep_in_core_open:1, freshened:1, do_not_close:1, pack_promisor:1, @@ -266,6 +267,7 @@ int packfile_store_freshen_object(struct packfile_store *store, enum kept_pack_type { KEPT_PACK_ON_DISK = (1 << 0), KEPT_PACK_IN_CORE = (1 << 1), + KEPT_PACK_IN_CORE_OPEN = (1 << 2), }; /* diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 7eb79bc2cd..c74b5861af 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -415,4 +415,109 @@ test_expect_success '--stdin-packs=follow tolerates missing commits' ' stdin_packs__follow_with_only HEAD HEAD^{tree} ' +test_expect_success '--stdin-packs=follow with open-excluded packs' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + git branch -M main && + + # Create the following commit structure: + # + # A <-- B <-- D (main) + # ^ + # \ + # C (other) + test_commit A && + test_commit B && + git checkout -B other && + test_commit C && + git checkout main && + test_commit D && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo B..D | git pack-objects --revs $packdir/pack)" && + + C_ONLY="$(git rev-parse other | git pack-objects $packdir/pack)" && + + git prune-packed && + + # Create a pack using --stdin-packs=follow where: + # + # - pack D is included, + # - pack C_ONLY is excluded, but open, + # - pack B is excluded, but closed, and + # - packs A and C are unknown + # + # The resulting pack should therefore contain: + # + # - objects from the included pack D, + # - A.t (rescued via D^{tree}), and + # - C^{tree} and C.t (rescued via pack C_ONLY) + # + # , but should omit: + # + # - C (excluded via C_ONLY), + # - objects from pack B (trivially excluded-closed) + # - A and A^{tree} (ancestors of B) + P=$(git pack-objects --stdin-packs=follow $packdir/pack <<-EOF + pack-$D.pack + !pack-$C_ONLY.pack + ^pack-$B.pack + EOF + ) && + + { + objects_in_packs $D && + git rev-parse A:A.t "C^{tree}" C:C.t + } >expect.raw && + sort expect.raw >expect && + + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + +test_expect_success '--stdin-packs with !-delimited pack without follow' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + test_commit A && + test_commit B && + test_commit C && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + + cat >in <<-EOF && + !pack-$A.pack + pack-$B.pack + pack-$C.pack + EOF + + # Without --stdin-packs=follow, we treat the first + # line of input as a literal packfile name, and thus + # expect pack-objects to complain of a missing pack + test_must_fail git pack-objects --stdin-packs --stdout \ + >/dev/null err && + test_grep "could not find pack .!pack-$A.pack." err && + + # With --stdin-packs=follow, we treat the second line + # of input as indicating pack-$A.pack is an excluded + # open pack, and thus expect pack-objects to succeed + P=$(git pack-objects --stdin-packs=follow $packdir/pack expect && + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + test_done From 9ad29df36d7c762677b5a4ecc6a6dc229c818b2a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:54 -0400 Subject: [PATCH 5/5] repack: mark non-MIDX packs above the split as excluded-open In 5ee86c273bf (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), geometric repacking learned to exclude cruft packs from the MIDX when 'repack.midxMustContainCruft' is set to 'false'. This works because packs generated with '--stdin-packs=follow' rescue any once-unreachable objects that later become reachable, making the resulting packs closed under reachability without needing the cruft pack in the MIDX. However, packs above the geometric split that were not part of the previous MIDX may not have full object closure. When such packs are marked as excluded-closed ('^'), pack-objects treats them as a reachability boundary and does not traverse through them during the follow pass, potentially leaving the resulting pack without full closure. Fix this by marking packs above the geometric split that were not in the previous MIDX as excluded-open ('!') instead of excluded-closed ('^'). This causes pack-objects to walk through their commits during the follow pass, rescuing any reachable objects not present in the closed-excluded packs. Note that MIDXs which were generated prior to this change and are unlucky enough to not be closed under reachability may still exhibit this bug, as we treat all MIDX'd packs as closed. That is true in an overwhelming number of cases, since in order to have a non-closed MIDX you would have to: - Generate a pack via an earlier geometric repack that is not closed under reachability. - Store that pack in the MIDX. - Avoid picking any commits to receive reachability bitmaps which happen to reach objects from which the missing objects are reachable. In the extremely rare chance that all of the above should happen, an all-into-one repack will resolve the issue. Unfortunately, there is no perfect way to determine whether a MIDX'd pack is closed outside of ensuring that there is a '1' bit in at least one bitmap for every bit position corresponding to objects in that pack. While this is possible to do, this approach would treat MIDX'd packs as open in cases where there is at least one object that is not reachable from the subset of commits selected for bitmapping. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 19 +++++++++++++++++-- t/t7704-repack-cruft.sh | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f6bb04bef7..4c5a82c2c8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -369,8 +369,23 @@ int cmd_repack(int argc, */ for (i = 0; i < geometry.split; i++) fprintf(in, "%s\n", pack_basename(geometry.pack[i])); - for (i = geometry.split; i < geometry.pack_nr; i++) - fprintf(in, "^%s\n", pack_basename(geometry.pack[i])); + for (i = geometry.split; i < geometry.pack_nr; i++) { + const char *basename = pack_basename(geometry.pack[i]); + char marker = '^'; + + if (!midx_must_contain_cruft && + !string_list_has_string(&existing.midx_packs, + basename)) { + /* + * Assume non-MIDX'd packs are not + * necessarily closed under + * reachability. + */ + marker = '!'; + } + + fprintf(in, "%c%s\n", marker, basename); + } fclose(in); } diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index 77133395b5..9e03b04315 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,7 +869,7 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' -test_expect_failure 'repack rescues once-cruft objects above geometric split' ' +test_expect_success 'repack rescues once-cruft objects above geometric split' ' git config repack.midxMustContainCruft false && test_commit reachable &&