Merge branch 'tb/stdin-packs-excluded-but-open' into next

pack-objects's --stdin-packs=follow mode learns to handle
excluded-but-open packs.

* tb/stdin-packs-excluded-but-open:
  repack: mark non-MIDX packs above the split as excluded-open
  pack-objects: support excluded-open packs with --stdin-packs
  t7704: demonstrate failure with once-cruft objects above the geometric split
  pack-objects: refactor `read_packs_list_from_stdin()` to use `strmap`
  pack-objects: plug leak in `read_stdin_packs()`
This commit is contained in:
Junio C Hamano
2026-03-31 13:00:54 -07:00
7 changed files with 372 additions and 103 deletions

View File

@@ -94,13 +94,24 @@ base-name::
included packs (those not beginning with `^`), excluding any
objects listed in the excluded packs (beginning with `^`).
+
When `mode` is "follow", objects from packs not listed on stdin receive
special treatment. Objects within unlisted packs will be included if
those objects are (1) reachable from the included packs, and (2) not
found in any excluded packs. This mode is useful, for example, to
resurrect once-unreachable objects found in cruft packs to generate
packs which are closed under reachability up to the boundary set by the
excluded packs.
When `mode` is "follow" packs may additionally be prefixed with `!`,
indicating that they are excluded but not necessarily closed under
reachability. In addition to objects in included packs, the resulting
pack may include additional objects based on the following:
+
--
* If any packs are marked with `!`, then objects reachable from such
packs or included ones via objects outside of excluded-closed packs
will be included. In this case, all `^` packs are treated as closed
under reachability.
* Otherwise (if there are no `!` packs), objects within unlisted packs
will be included if those objects are (1) reachable from the
included packs, and (2) not found in any excluded packs.
--
+
This mode is useful, for example, to resurrect once-unreachable
objects found in cruft packs to generate packs which are closed under
reachability up to the boundary set by the excluded packs.
+
Incompatible with `--revs`, or options that imply `--revs` (such as
`--all`), with the exception of `--unpacked`, which is compatible.

View File

@@ -28,6 +28,7 @@
#include "reachable.h"
#include "oid-array.h"
#include "strvec.h"
#include "strmap.h"
#include "list.h"
#include "packfile.h"
#include "object-file.h"
@@ -217,6 +218,7 @@ static int have_non_local_packs;
static int incremental;
static int ignore_packed_keep_on_disk;
static int ignore_packed_keep_in_core;
static int ignore_packed_keep_in_core_open;
static int ignore_packed_keep_in_core_has_cruft;
static int allow_ofs_delta;
static struct pack_idx_option pack_idx_opts;
@@ -1632,7 +1634,8 @@ static int want_found_object(const struct object_id *oid, int exclude,
/*
* Then handle .keep first, as we have a fast(er) path there.
*/
if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core ||
ignore_packed_keep_in_core_open) {
/*
* Set the flags for the kept-pack cache to be the ones we want
* to ignore.
@@ -1646,6 +1649,8 @@ static int want_found_object(const struct object_id *oid, int exclude,
flags |= KEPT_PACK_ON_DISK;
if (ignore_packed_keep_in_core)
flags |= KEPT_PACK_IN_CORE;
if (ignore_packed_keep_in_core_open)
flags |= KEPT_PACK_IN_CORE_OPEN;
/*
* If the object is in a pack that we want to ignore, *and* we
@@ -1657,6 +1662,8 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
if (ignore_packed_keep_in_core_open && p->pack_keep_in_core_open)
return 0;
if (has_object_kept_pack(p->repo, oid, flags))
return 0;
} else {
@@ -3756,6 +3763,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
void *_data)
{
off_t ofs;
struct object_info oi = OBJECT_INFO_INIT;
enum object_type type = OBJ_NONE;
display_progress(progress_state, ++nr_seen);
@@ -3763,29 +3771,34 @@ static int add_object_entry_from_pack(const struct object_id *oid,
if (have_duplicate_entry(oid, 0))
return 0;
stdin_packs_found_nr++;
ofs = nth_packed_object_offset(p, pos);
oi.typep = &type;
if (packed_object_info(p, ofs, &oi) < 0) {
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
} else if (type == OBJ_COMMIT) {
struct rev_info *revs = _data;
/*
* commits in included packs are used as starting points
* for the subsequent revision walk
*
* Note that we do want to walk through commits that are
* present in excluded-open ('!') packs to pick up any
* objects reachable from them not present in the
* excluded-closed ('^') packs.
*
* However, we'll only add those objects to the packing
* list after checking `want_object_in_pack()` below.
*/
add_pending_oid(revs, NULL, oid, 0);
}
if (!want_object_in_pack(oid, 0, &p, &ofs))
return 0;
if (p) {
struct object_info oi = OBJECT_INFO_INIT;
oi.typep = &type;
if (packed_object_info(p, ofs, &oi) < 0) {
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
} else if (type == OBJ_COMMIT) {
struct rev_info *revs = _data;
/*
* commits in included packs are used as starting points for the
* subsequent revision walk
*/
add_pending_oid(revs, NULL, oid, 0);
}
stdin_packs_found_nr++;
}
create_object_entry(oid, type, 0, 0, 0, p, ofs);
return 0;
@@ -3835,87 +3848,78 @@ static void show_commit_pack_hint(struct commit *commit, void *data)
}
/*
* stdin_pack_info_kind specifies how a pack specified over stdin
* should be treated when pack-objects is invoked with --stdin-packs.
*
* - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set
* should be included in the output pack, unless they appear in an
* excluded pack.
*
* - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag
* bit set should be excluded from the output pack.
*
* - STDIN_PACK_EXCLUDE_OPEN: objects in any packs with this flag
* bit set should be excluded from the output pack, but are not
* guaranteed to be closed under reachability.
*
* Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE or
* STDIN_PACK_EXCLUDE_OPEN are used as traversal tips when invoked
* with --stdin-packs=follow.
*/
enum stdin_pack_info_kind {
STDIN_PACK_INCLUDE = (1<<0),
STDIN_PACK_EXCLUDE_CLOSED = (1<<1),
STDIN_PACK_EXCLUDE_OPEN = (1<<2),
};
struct stdin_pack_info {
struct packed_git *p;
enum stdin_pack_info_kind kind;
};
static int pack_mtime_cmp(const void *_a, const void *_b)
{
struct packed_git *a = ((const struct string_list_item*)_a)->util;
struct packed_git *b = ((const struct string_list_item*)_b)->util;
struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util;
struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util;
/*
* order packs by descending mtime so that objects are laid out
* roughly as newest-to-oldest
*/
if (a->mtime < b->mtime)
if (a->p->mtime < b->p->mtime)
return 1;
else if (b->mtime < a->mtime)
else if (b->p->mtime < a->p->mtime)
return -1;
else
return 0;
}
static void read_packs_list_from_stdin(struct rev_info *revs)
static int stdin_packs_include_check_obj(struct object *obj, void *data UNUSED)
{
struct strbuf buf = STRBUF_INIT;
struct string_list include_packs = STRING_LIST_INIT_DUP;
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
struct string_list_item *item = NULL;
struct packed_git *p;
return !has_object_kept_pack(to_pack.repo, &obj->oid,
KEPT_PACK_IN_CORE);
}
while (strbuf_getline(&buf, stdin) != EOF) {
if (!buf.len)
continue;
static int stdin_packs_include_check(struct commit *commit, void *data)
{
return stdin_packs_include_check_obj((struct object *)commit, data);
}
if (*buf.buf == '^')
string_list_append(&exclude_packs, buf.buf + 1);
else
string_list_append(&include_packs, buf.buf);
static void stdin_packs_add_pack_entries(struct strmap *packs,
struct rev_info *revs)
{
struct string_list keys = STRING_LIST_INIT_NODUP;
struct string_list_item *item;
struct hashmap_iter iter;
struct strmap_entry *entry;
strbuf_reset(&buf);
}
strmap_for_each_entry(packs, &iter, entry) {
struct stdin_pack_info *info = entry->value;
if (!info->p)
die(_("could not find pack '%s'"), entry->key);
string_list_sort_u(&include_packs, 0);
string_list_sort_u(&exclude_packs, 0);
repo_for_each_pack(the_repository, p) {
const char *pack_name = pack_basename(p);
if ((item = string_list_lookup(&include_packs, pack_name))) {
if (exclude_promisor_objects && p->pack_promisor)
die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
item->util = p;
}
if ((item = string_list_lookup(&exclude_packs, pack_name)))
item->util = p;
}
/*
* Arguments we got on stdin may not even be packs. First
* check that to avoid segfaulting later on in
* e.g. pack_mtime_cmp(), excluded packs are handled below.
*
* Since we first parsed our STDIN and then sorted the input
* lines the pack we error on will be whatever line happens to
* sort first. This is lazy, it's enough that we report one
* bad case here, we don't need to report the first/last one,
* or all of them.
*/
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
if (!is_pack_valid(p))
die(_("packfile %s cannot be accessed"), p->pack_name);
}
/*
* Then, handle all of the excluded packs, marking them as
* kept in-core so that later calls to add_object_entry()
* discards any objects that are also found in excluded packs.
*/
for_each_string_list_item(item, &exclude_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
p->pack_keep_in_core = 1;
string_list_append(&keys, entry->key)->util = info;
}
/*
@@ -3923,19 +3927,118 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
* string_list_item's ->util pointer, which string_list_sort() does not
* provide.
*/
QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
QSORT(keys.items, keys.nr, pack_mtime_cmp);
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
ODB_FOR_EACH_OBJECT_PACK_ORDER);
for_each_string_list_item(item, &keys) {
struct stdin_pack_info *info = item->util;
if (info->kind & STDIN_PACK_EXCLUDE_OPEN) {
/*
* When open-excluded packs ("!") are present, stop
* the parent walk at closed-excluded ("^") packs.
* Objects behind a "^" boundary are guaranteed to
* have closure and should not be rescued.
*/
revs->include_check = stdin_packs_include_check;
revs->include_check_obj = stdin_packs_include_check_obj;
}
if ((info->kind & STDIN_PACK_INCLUDE) ||
(info->kind & STDIN_PACK_EXCLUDE_OPEN))
for_each_object_in_pack(info->p,
add_object_entry_from_pack,
revs,
ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
string_list_clear(&keys, 0);
}
static void stdin_packs_read_input(struct rev_info *revs,
enum stdin_packs_mode mode)
{
struct strbuf buf = STRBUF_INIT;
struct strmap packs = STRMAP_INIT;
struct packed_git *p;
while (strbuf_getline(&buf, stdin) != EOF) {
struct stdin_pack_info *info;
enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE;
const char *key = buf.buf;
if (!*key)
continue;
else if (*key == '^')
kind = STDIN_PACK_EXCLUDE_CLOSED;
else if (*key == '!' && mode == STDIN_PACKS_MODE_FOLLOW)
kind = STDIN_PACK_EXCLUDE_OPEN;
if (kind != STDIN_PACK_INCLUDE)
key++;
info = strmap_get(&packs, key);
if (!info) {
CALLOC_ARRAY(info, 1);
strmap_put(&packs, key, info);
}
info->kind |= kind;
strbuf_reset(&buf);
}
repo_for_each_pack(the_repository, p) {
struct stdin_pack_info *info;
info = strmap_get(&packs, pack_basename(p));
if (!info)
continue;
if (info->kind & STDIN_PACK_INCLUDE) {
if (exclude_promisor_objects && p->pack_promisor)
die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
/*
* Arguments we got on stdin may not even be
* packs. First check that to avoid segfaulting
* later on in e.g. pack_mtime_cmp(), excluded
* packs are handled below.
*/
if (!is_pack_valid(p))
die(_("packfile %s cannot be accessed"), p->pack_name);
}
if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) {
/*
* Marking excluded packs as kept in-core so
* that later calls to add_object_entry()
* discards any objects that are also found in
* excluded packs.
*/
p->pack_keep_in_core = 1;
}
if (info->kind & STDIN_PACK_EXCLUDE_OPEN) {
/*
* Marking excluded open packs as kept in-core
* (open) for the same reason as we marked
* exclude closed packs as kept in-core.
*
* Use a separate flag here to ensure we don't
* halt our traversal at these packs, since they
* are not guaranteed to have closure.
*
*/
p->pack_keep_in_core_open = 1;
}
info->p = p;
}
stdin_packs_add_pack_entries(&packs, revs);
strbuf_release(&buf);
string_list_clear(&include_packs, 0);
string_list_clear(&exclude_packs, 0);
strmap_clear(&packs, 1);
}
static void add_unreachable_loose_objects(struct rev_info *revs);
@@ -3972,7 +4075,15 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
read_packs_list_from_stdin(&revs);
if (mode == STDIN_PACKS_MODE_FOLLOW) {
/*
* In '--stdin-packs=follow' mode, additionally ignore
* objects in excluded-open packs to prevent them from
* appearing in the resulting pack.
*/
ignore_packed_keep_in_core_open = 1;
}
stdin_packs_read_input(&revs, mode);
if (rev_list_unpacked)
add_unreachable_loose_objects(&revs);
@@ -3983,6 +4094,8 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
show_object_pack_hint,
&mode);
release_revisions(&revs);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
stdin_packs_found_nr);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",

View File

@@ -369,8 +369,23 @@ int cmd_repack(int argc,
*/
for (i = 0; i < geometry.split; i++)
fprintf(in, "%s\n", pack_basename(geometry.pack[i]));
for (i = geometry.split; i < geometry.pack_nr; i++)
fprintf(in, "^%s\n", pack_basename(geometry.pack[i]));
for (i = geometry.split; i < geometry.pack_nr; i++) {
const char *basename = pack_basename(geometry.pack[i]);
char marker = '^';
if (!midx_must_contain_cruft &&
!string_list_has_string(&existing.midx_packs,
basename)) {
/*
* Assume non-MIDX'd packs are not
* necessarily closed under
* reachability.
*/
marker = '!';
}
fprintf(in, "%c%s\n", marker, basename);
}
fclose(in);
}

View File

@@ -2244,7 +2244,8 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st
struct packed_git *p = e->pack;
if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) ||
(p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) {
(p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE)) ||
(p->pack_keep_in_core_open && (flags & KEPT_PACK_IN_CORE_OPEN))) {
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr++] = p;
}

View File

@@ -28,6 +28,7 @@ struct packed_git {
unsigned pack_local:1,
pack_keep:1,
pack_keep_in_core:1,
pack_keep_in_core_open:1,
freshened:1,
do_not_close:1,
pack_promisor:1,
@@ -266,6 +267,7 @@ int packfile_store_freshen_object(struct packfile_store *store,
enum kept_pack_type {
KEPT_PACK_ON_DISK = (1 << 0),
KEPT_PACK_IN_CORE = (1 << 1),
KEPT_PACK_IN_CORE_OPEN = (1 << 2),
};
/*

View File

@@ -415,4 +415,109 @@ test_expect_success '--stdin-packs=follow tolerates missing commits' '
stdin_packs__follow_with_only HEAD HEAD^{tree}
'
test_expect_success '--stdin-packs=follow with open-excluded packs' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
git config set maintenance.auto false &&
git branch -M main &&
# Create the following commit structure:
#
# A <-- B <-- D (main)
# ^
# \
# C (other)
test_commit A &&
test_commit B &&
git checkout -B other &&
test_commit C &&
git checkout main &&
test_commit D &&
A="$(echo A | git pack-objects --revs $packdir/pack)" &&
B="$(echo A..B | git pack-objects --revs $packdir/pack)" &&
C="$(echo B..C | git pack-objects --revs $packdir/pack)" &&
D="$(echo B..D | git pack-objects --revs $packdir/pack)" &&
C_ONLY="$(git rev-parse other | git pack-objects $packdir/pack)" &&
git prune-packed &&
# Create a pack using --stdin-packs=follow where:
#
# - pack D is included,
# - pack C_ONLY is excluded, but open,
# - pack B is excluded, but closed, and
# - packs A and C are unknown
#
# The resulting pack should therefore contain:
#
# - objects from the included pack D,
# - A.t (rescued via D^{tree}), and
# - C^{tree} and C.t (rescued via pack C_ONLY)
#
# , but should omit:
#
# - C (excluded via C_ONLY),
# - objects from pack B (trivially excluded-closed)
# - A and A^{tree} (ancestors of B)
P=$(git pack-objects --stdin-packs=follow $packdir/pack <<-EOF
pack-$D.pack
!pack-$C_ONLY.pack
^pack-$B.pack
EOF
) &&
{
objects_in_packs $D &&
git rev-parse A:A.t "C^{tree}" C:C.t
} >expect.raw &&
sort expect.raw >expect &&
objects_in_packs $P >actual &&
test_cmp expect actual
)
'
test_expect_success '--stdin-packs with !-delimited pack without follow' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
test_commit A &&
test_commit B &&
test_commit C &&
A="$(echo A | git pack-objects --revs $packdir/pack)" &&
B="$(echo A..B | git pack-objects --revs $packdir/pack)" &&
C="$(echo B..C | git pack-objects --revs $packdir/pack)" &&
cat >in <<-EOF &&
!pack-$A.pack
pack-$B.pack
pack-$C.pack
EOF
# Without --stdin-packs=follow, we treat the first
# line of input as a literal packfile name, and thus
# expect pack-objects to complain of a missing pack
test_must_fail git pack-objects --stdin-packs --stdout \
>/dev/null <in 2>err &&
test_grep "could not find pack .!pack-$A.pack." err &&
# With --stdin-packs=follow, we treat the second line
# of input as indicating pack-$A.pack is an excluded
# open pack, and thus expect pack-objects to succeed
P=$(git pack-objects --stdin-packs=follow $packdir/pack <in) &&
objects_in_packs $B $C >expect &&
objects_in_packs $P >actual &&
test_cmp expect actual
)
'
test_done

View File

@@ -869,4 +869,26 @@ test_expect_success 'repack --write-midx includes cruft when already geometric'
)
'
test_expect_success 'repack rescues once-cruft objects above geometric split' '
git config repack.midxMustContainCruft false &&
test_commit reachable &&
test_commit unreachable &&
unreachable="$(git rev-parse HEAD)" &&
git reset --hard HEAD^ &&
git tag -d unreachable &&
git reflog expire --all --expire=all &&
git repack --cruft -d &&
echo $unreachable | git pack-objects .git/objects/pack/pack &&
test_commit new &&
git update-ref refs/heads/other $unreachable &&
git repack --geometric=2 -d --write-midx --write-bitmap-index
'
test_done