diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 71b9682485..b78175fbe1 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -94,13 +94,24 @@ base-name:: included packs (those not beginning with `^`), excluding any objects listed in the excluded packs (beginning with `^`). + -When `mode` is "follow", objects from packs not listed on stdin receive -special treatment. Objects within unlisted packs will be included if -those objects are (1) reachable from the included packs, and (2) not -found in any excluded packs. This mode is useful, for example, to -resurrect once-unreachable objects found in cruft packs to generate -packs which are closed under reachability up to the boundary set by the -excluded packs. +When `mode` is "follow" packs may additionally be prefixed with `!`, +indicating that they are excluded but not necessarily closed under +reachability. In addition to objects in included packs, the resulting +pack may include additional objects based on the following: ++ +-- +* If any packs are marked with `!`, then objects reachable from such + packs or included ones via objects outside of excluded-closed packs + will be included. In this case, all `^` packs are treated as closed + under reachability. +* Otherwise (if there are no `!` packs), objects within unlisted packs + will be included if those objects are (1) reachable from the + included packs, and (2) not found in any excluded packs. +-- ++ +This mode is useful, for example, to resurrect once-unreachable +objects found in cruft packs to generate packs which are closed under +reachability up to the boundary set by the excluded packs. + Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 88388254d9..dd2480a73d 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -28,6 +28,7 @@ #include "reachable.h" #include "oid-array.h" #include "strvec.h" +#include "strmap.h" #include "list.h" #include "packfile.h" #include "object-file.h" @@ -217,6 +218,7 @@ static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_open; static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; @@ -1632,7 +1634,8 @@ static int want_found_object(const struct object_id *oid, int exclude, /* * Then handle .keep first, as we have a fast(er) path there. */ - if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { + if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core || + ignore_packed_keep_in_core_open) { /* * Set the flags for the kept-pack cache to be the ones we want * to ignore. @@ -1646,6 +1649,8 @@ static int want_found_object(const struct object_id *oid, int exclude, flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) flags |= KEPT_PACK_IN_CORE; + if (ignore_packed_keep_in_core_open) + flags |= KEPT_PACK_IN_CORE_OPEN; /* * If the object is in a pack that we want to ignore, *and* we @@ -1657,6 +1662,8 @@ static int want_found_object(const struct object_id *oid, int exclude, return 0; if (ignore_packed_keep_in_core && p->pack_keep_in_core) return 0; + if (ignore_packed_keep_in_core_open && p->pack_keep_in_core_open) + return 0; if (has_object_kept_pack(p->repo, oid, flags)) return 0; } else { @@ -3756,6 +3763,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, void *_data) { off_t ofs; + struct object_info oi = OBJECT_INFO_INIT; enum object_type type = OBJ_NONE; display_progress(progress_state, ++nr_seen); @@ -3763,29 +3771,34 @@ static int add_object_entry_from_pack(const struct object_id *oid, if (have_duplicate_entry(oid, 0)) return 0; + stdin_packs_found_nr++; + ofs = nth_packed_object_offset(p, pos); + + oi.typep = &type; + if (packed_object_info(p, ofs, &oi) < 0) { + die(_("could not get type of object %s in pack %s"), + oid_to_hex(oid), p->pack_name); + } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; + /* + * commits in included packs are used as starting points + * for the subsequent revision walk + * + * Note that we do want to walk through commits that are + * present in excluded-open ('!') packs to pick up any + * objects reachable from them not present in the + * excluded-closed ('^') packs. + * + * However, we'll only add those objects to the packing + * list after checking `want_object_in_pack()` below. + */ + add_pending_oid(revs, NULL, oid, 0); + } + if (!want_object_in_pack(oid, 0, &p, &ofs)) return 0; - if (p) { - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = &type; - if (packed_object_info(p, ofs, &oi) < 0) { - die(_("could not get type of object %s in pack %s"), - oid_to_hex(oid), p->pack_name); - } else if (type == OBJ_COMMIT) { - struct rev_info *revs = _data; - /* - * commits in included packs are used as starting points for the - * subsequent revision walk - */ - add_pending_oid(revs, NULL, oid, 0); - } - - stdin_packs_found_nr++; - } - create_object_entry(oid, type, 0, 0, 0, p, ofs); return 0; @@ -3835,87 +3848,78 @@ static void show_commit_pack_hint(struct commit *commit, void *data) } +/* + * stdin_pack_info_kind specifies how a pack specified over stdin + * should be treated when pack-objects is invoked with --stdin-packs. + * + * - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set + * should be included in the output pack, unless they appear in an + * excluded pack. + * + * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag + * bit set should be excluded from the output pack. + * + * - STDIN_PACK_EXCLUDE_OPEN: objects in any packs with this flag + * bit set should be excluded from the output pack, but are not + * guaranteed to be closed under reachability. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE or + * STDIN_PACK_EXCLUDE_OPEN are used as traversal tips when invoked + * with --stdin-packs=follow. + */ +enum stdin_pack_info_kind { + STDIN_PACK_INCLUDE = (1<<0), + STDIN_PACK_EXCLUDE_CLOSED = (1<<1), + STDIN_PACK_EXCLUDE_OPEN = (1<<2), +}; + +struct stdin_pack_info { + struct packed_git *p; + enum stdin_pack_info_kind kind; +}; + static int pack_mtime_cmp(const void *_a, const void *_b) { - struct packed_git *a = ((const struct string_list_item*)_a)->util; - struct packed_git *b = ((const struct string_list_item*)_b)->util; + struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util; + struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util; /* * order packs by descending mtime so that objects are laid out * roughly as newest-to-oldest */ - if (a->mtime < b->mtime) + if (a->p->mtime < b->p->mtime) return 1; - else if (b->mtime < a->mtime) + else if (b->p->mtime < a->p->mtime) return -1; else return 0; } -static void read_packs_list_from_stdin(struct rev_info *revs) +static int stdin_packs_include_check_obj(struct object *obj, void *data UNUSED) { - struct strbuf buf = STRBUF_INIT; - struct string_list include_packs = STRING_LIST_INIT_DUP; - struct string_list exclude_packs = STRING_LIST_INIT_DUP; - struct string_list_item *item = NULL; - struct packed_git *p; + return !has_object_kept_pack(to_pack.repo, &obj->oid, + KEPT_PACK_IN_CORE); +} - while (strbuf_getline(&buf, stdin) != EOF) { - if (!buf.len) - continue; +static int stdin_packs_include_check(struct commit *commit, void *data) +{ + return stdin_packs_include_check_obj((struct object *)commit, data); +} - if (*buf.buf == '^') - string_list_append(&exclude_packs, buf.buf + 1); - else - string_list_append(&include_packs, buf.buf); +static void stdin_packs_add_pack_entries(struct strmap *packs, + struct rev_info *revs) +{ + struct string_list keys = STRING_LIST_INIT_NODUP; + struct string_list_item *item; + struct hashmap_iter iter; + struct strmap_entry *entry; - strbuf_reset(&buf); - } + strmap_for_each_entry(packs, &iter, entry) { + struct stdin_pack_info *info = entry->value; + if (!info->p) + die(_("could not find pack '%s'"), entry->key); - string_list_sort_u(&include_packs, 0); - string_list_sort_u(&exclude_packs, 0); - - repo_for_each_pack(the_repository, p) { - const char *pack_name = pack_basename(p); - - if ((item = string_list_lookup(&include_packs, pack_name))) { - if (exclude_promisor_objects && p->pack_promisor) - die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); - item->util = p; - } - if ((item = string_list_lookup(&exclude_packs, pack_name))) - item->util = p; - } - - /* - * Arguments we got on stdin may not even be packs. First - * check that to avoid segfaulting later on in - * e.g. pack_mtime_cmp(), excluded packs are handled below. - * - * Since we first parsed our STDIN and then sorted the input - * lines the pack we error on will be whatever line happens to - * sort first. This is lazy, it's enough that we report one - * bad case here, we don't need to report the first/last one, - * or all of them. - */ - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - if (!is_pack_valid(p)) - die(_("packfile %s cannot be accessed"), p->pack_name); - } - - /* - * Then, handle all of the excluded packs, marking them as - * kept in-core so that later calls to add_object_entry() - * discards any objects that are also found in excluded packs. - */ - for_each_string_list_item(item, &exclude_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - p->pack_keep_in_core = 1; + string_list_append(&keys, entry->key)->util = info; } /* @@ -3923,19 +3927,118 @@ static void read_packs_list_from_stdin(struct rev_info *revs) * string_list_item's ->util pointer, which string_list_sort() does not * provide. */ - QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp); + QSORT(keys.items, keys.nr, pack_mtime_cmp); - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - for_each_object_in_pack(p, - add_object_entry_from_pack, - revs, - ODB_FOR_EACH_OBJECT_PACK_ORDER); + for_each_string_list_item(item, &keys) { + struct stdin_pack_info *info = item->util; + + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * When open-excluded packs ("!") are present, stop + * the parent walk at closed-excluded ("^") packs. + * Objects behind a "^" boundary are guaranteed to + * have closure and should not be rescued. + */ + revs->include_check = stdin_packs_include_check; + revs->include_check_obj = stdin_packs_include_check_obj; + } + + if ((info->kind & STDIN_PACK_INCLUDE) || + (info->kind & STDIN_PACK_EXCLUDE_OPEN)) + for_each_object_in_pack(info->p, + add_object_entry_from_pack, + revs, + ODB_FOR_EACH_OBJECT_PACK_ORDER); } + string_list_clear(&keys, 0); +} + +static void stdin_packs_read_input(struct rev_info *revs, + enum stdin_packs_mode mode) +{ + struct strbuf buf = STRBUF_INIT; + struct strmap packs = STRMAP_INIT; + struct packed_git *p; + + while (strbuf_getline(&buf, stdin) != EOF) { + struct stdin_pack_info *info; + enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE; + const char *key = buf.buf; + + if (!*key) + continue; + else if (*key == '^') + kind = STDIN_PACK_EXCLUDE_CLOSED; + else if (*key == '!' && mode == STDIN_PACKS_MODE_FOLLOW) + kind = STDIN_PACK_EXCLUDE_OPEN; + + if (kind != STDIN_PACK_INCLUDE) + key++; + + info = strmap_get(&packs, key); + if (!info) { + CALLOC_ARRAY(info, 1); + strmap_put(&packs, key, info); + } + + info->kind |= kind; + + strbuf_reset(&buf); + } + + repo_for_each_pack(the_repository, p) { + struct stdin_pack_info *info; + + info = strmap_get(&packs, pack_basename(p)); + if (!info) + continue; + + if (info->kind & STDIN_PACK_INCLUDE) { + if (exclude_promisor_objects && p->pack_promisor) + die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); + + /* + * Arguments we got on stdin may not even be + * packs. First check that to avoid segfaulting + * later on in e.g. pack_mtime_cmp(), excluded + * packs are handled below. + */ + if (!is_pack_valid(p)) + die(_("packfile %s cannot be accessed"), p->pack_name); + } + + if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) { + /* + * Marking excluded packs as kept in-core so + * that later calls to add_object_entry() + * discards any objects that are also found in + * excluded packs. + */ + p->pack_keep_in_core = 1; + } + + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * Marking excluded open packs as kept in-core + * (open) for the same reason as we marked + * exclude closed packs as kept in-core. + * + * Use a separate flag here to ensure we don't + * halt our traversal at these packs, since they + * are not guaranteed to have closure. + * + */ + p->pack_keep_in_core_open = 1; + } + + info->p = p; + } + + stdin_packs_add_pack_entries(&packs, revs); + strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); + strmap_clear(&packs, 1); } static void add_unreachable_loose_objects(struct rev_info *revs); @@ -3972,7 +4075,15 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(&revs); + if (mode == STDIN_PACKS_MODE_FOLLOW) { + /* + * In '--stdin-packs=follow' mode, additionally ignore + * objects in excluded-open packs to prevent them from + * appearing in the resulting pack. + */ + ignore_packed_keep_in_core_open = 1; + } + stdin_packs_read_input(&revs, mode); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); @@ -3983,6 +4094,8 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) show_object_pack_hint, &mode); + release_revisions(&revs); + trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", diff --git a/builtin/repack.c b/builtin/repack.c index f6bb04bef7..4c5a82c2c8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -369,8 +369,23 @@ int cmd_repack(int argc, */ for (i = 0; i < geometry.split; i++) fprintf(in, "%s\n", pack_basename(geometry.pack[i])); - for (i = geometry.split; i < geometry.pack_nr; i++) - fprintf(in, "^%s\n", pack_basename(geometry.pack[i])); + for (i = geometry.split; i < geometry.pack_nr; i++) { + const char *basename = pack_basename(geometry.pack[i]); + char marker = '^'; + + if (!midx_must_contain_cruft && + !string_list_has_string(&existing.midx_packs, + basename)) { + /* + * Assume non-MIDX'd packs are not + * necessarily closed under + * reachability. + */ + marker = '!'; + } + + fprintf(in, "%c%s\n", marker, basename); + } fclose(in); } diff --git a/packfile.c b/packfile.c index ee9c7ea1d1..48c88748b6 100644 --- a/packfile.c +++ b/packfile.c @@ -2244,7 +2244,8 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st struct packed_git *p = e->pack; if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || - (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE)) || + (p->pack_keep_in_core_open && (flags & KEPT_PACK_IN_CORE_OPEN))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } diff --git a/packfile.h b/packfile.h index 45b35973f0..6e8802e2ed 100644 --- a/packfile.h +++ b/packfile.h @@ -28,6 +28,7 @@ struct packed_git { unsigned pack_local:1, pack_keep:1, pack_keep_in_core:1, + pack_keep_in_core_open:1, freshened:1, do_not_close:1, pack_promisor:1, @@ -266,6 +267,7 @@ int packfile_store_freshen_object(struct packfile_store *store, enum kept_pack_type { KEPT_PACK_ON_DISK = (1 << 0), KEPT_PACK_IN_CORE = (1 << 1), + KEPT_PACK_IN_CORE_OPEN = (1 << 2), }; /* diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 7eb79bc2cd..c74b5861af 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -415,4 +415,109 @@ test_expect_success '--stdin-packs=follow tolerates missing commits' ' stdin_packs__follow_with_only HEAD HEAD^{tree} ' +test_expect_success '--stdin-packs=follow with open-excluded packs' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + git branch -M main && + + # Create the following commit structure: + # + # A <-- B <-- D (main) + # ^ + # \ + # C (other) + test_commit A && + test_commit B && + git checkout -B other && + test_commit C && + git checkout main && + test_commit D && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo B..D | git pack-objects --revs $packdir/pack)" && + + C_ONLY="$(git rev-parse other | git pack-objects $packdir/pack)" && + + git prune-packed && + + # Create a pack using --stdin-packs=follow where: + # + # - pack D is included, + # - pack C_ONLY is excluded, but open, + # - pack B is excluded, but closed, and + # - packs A and C are unknown + # + # The resulting pack should therefore contain: + # + # - objects from the included pack D, + # - A.t (rescued via D^{tree}), and + # - C^{tree} and C.t (rescued via pack C_ONLY) + # + # , but should omit: + # + # - C (excluded via C_ONLY), + # - objects from pack B (trivially excluded-closed) + # - A and A^{tree} (ancestors of B) + P=$(git pack-objects --stdin-packs=follow $packdir/pack <<-EOF + pack-$D.pack + !pack-$C_ONLY.pack + ^pack-$B.pack + EOF + ) && + + { + objects_in_packs $D && + git rev-parse A:A.t "C^{tree}" C:C.t + } >expect.raw && + sort expect.raw >expect && + + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + +test_expect_success '--stdin-packs with !-delimited pack without follow' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + test_commit A && + test_commit B && + test_commit C && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + + cat >in <<-EOF && + !pack-$A.pack + pack-$B.pack + pack-$C.pack + EOF + + # Without --stdin-packs=follow, we treat the first + # line of input as a literal packfile name, and thus + # expect pack-objects to complain of a missing pack + test_must_fail git pack-objects --stdin-packs --stdout \ + >/dev/null err && + test_grep "could not find pack .!pack-$A.pack." err && + + # With --stdin-packs=follow, we treat the second line + # of input as indicating pack-$A.pack is an excluded + # open pack, and thus expect pack-objects to succeed + P=$(git pack-objects --stdin-packs=follow $packdir/pack expect && + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + test_done diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index aa2e2e6ad8..9e03b04315 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,4 +869,26 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' +test_expect_success 'repack rescues once-cruft objects above geometric split' ' + git config repack.midxMustContainCruft false && + + test_commit reachable && + test_commit unreachable && + + unreachable="$(git rev-parse HEAD)" && + + git reset --hard HEAD^ && + git tag -d unreachable && + git reflog expire --all --expire=all && + + git repack --cruft -d && + + echo $unreachable | git pack-objects .git/objects/pack/pack && + + test_commit new && + + git update-ref refs/heads/other $unreachable && + git repack --geometric=2 -d --write-midx --write-bitmap-index +' + test_done