pack-objects: refactor read_packs_list_from_stdin() to use strmap

The '--stdin-packs' mode of pack-objects maintains two separate
string_lists: one for included packs, and one for excluded packs. Each
list stores the pack basename as a string and the corresponding
`packed_git` pointer in its `->util` field.

This works, but makes it awkward to extend the set of pack "kinds" that
pack-objects can accept via stdin, since each new kind would need its
own string_list and duplicated handling. A future commit will want to do
just this, so prepare for that change by handling the various "kinds" of
packs specified over stdin in a more generic fashion.

Namely, replace the two `string_list`s with a single `strmap` keyed on
the pack basename, with values pointing to a new `struct
stdin_pack_info`. This struct tracks both the `packed_git` pointer and a
`kind` bitfield indicating whether the pack was specified as included or
excluded.

Extract the logic for sorting packs by mtime and adding their objects
into a separate `stdin_packs_add_pack_entries()` helper.

While we could have used a `string_list`, we must handle the case where
the same pack is specified more than once. With a `string_list` only, we
would have to pay a quadratic cost to either (a) insert elements into
their sorted positions, or (b) a repeated linear search, which is
accidentally quadratic. For that reason, use a strmap instead.

This patch does not include any functional changes.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau
2026-03-27 16:06:46 -04:00
committed by Junio C Hamano
parent 81e2906437
commit d31d1f2e06

View File

@@ -28,6 +28,7 @@
#include "reachable.h"
#include "oid-array.h"
#include "strvec.h"
#include "strmap.h"
#include "list.h"
#include "packfile.h"
#include "object-file.h"
@@ -3820,87 +3821,61 @@ static void show_commit_pack_hint(struct commit *commit, void *data)
}
/*
* stdin_pack_info_kind specifies how a pack specified over stdin
* should be treated when pack-objects is invoked with --stdin-packs.
*
* - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set
* should be included in the output pack, unless they appear in an
* excluded pack.
*
* - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag
* bit set should be excluded from the output pack.
*
* Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are
* used as traversal tips when invoked with --stdin-packs=follow.
*/
enum stdin_pack_info_kind {
STDIN_PACK_INCLUDE = (1<<0),
STDIN_PACK_EXCLUDE_CLOSED = (1<<1),
};
struct stdin_pack_info {
struct packed_git *p;
enum stdin_pack_info_kind kind;
};
static int pack_mtime_cmp(const void *_a, const void *_b)
{
struct packed_git *a = ((const struct string_list_item*)_a)->util;
struct packed_git *b = ((const struct string_list_item*)_b)->util;
struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util;
struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util;
/*
* order packs by descending mtime so that objects are laid out
* roughly as newest-to-oldest
*/
if (a->mtime < b->mtime)
if (a->p->mtime < b->p->mtime)
return 1;
else if (b->mtime < a->mtime)
else if (b->p->mtime < a->p->mtime)
return -1;
else
return 0;
}
static void read_packs_list_from_stdin(struct rev_info *revs)
static void stdin_packs_add_pack_entries(struct strmap *packs,
struct rev_info *revs)
{
struct strbuf buf = STRBUF_INIT;
struct string_list include_packs = STRING_LIST_INIT_DUP;
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
struct string_list_item *item = NULL;
struct packed_git *p;
struct string_list keys = STRING_LIST_INIT_NODUP;
struct string_list_item *item;
struct hashmap_iter iter;
struct strmap_entry *entry;
while (strbuf_getline(&buf, stdin) != EOF) {
if (!buf.len)
continue;
strmap_for_each_entry(packs, &iter, entry) {
struct stdin_pack_info *info = entry->value;
if (!info->p)
die(_("could not find pack '%s'"), entry->key);
if (*buf.buf == '^')
string_list_append(&exclude_packs, buf.buf + 1);
else
string_list_append(&include_packs, buf.buf);
strbuf_reset(&buf);
}
string_list_sort_u(&include_packs, 0);
string_list_sort_u(&exclude_packs, 0);
repo_for_each_pack(the_repository, p) {
const char *pack_name = pack_basename(p);
if ((item = string_list_lookup(&include_packs, pack_name))) {
if (exclude_promisor_objects && p->pack_promisor)
die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
item->util = p;
}
if ((item = string_list_lookup(&exclude_packs, pack_name)))
item->util = p;
}
/*
* Arguments we got on stdin may not even be packs. First
* check that to avoid segfaulting later on in
* e.g. pack_mtime_cmp(), excluded packs are handled below.
*
* Since we first parsed our STDIN and then sorted the input
* lines the pack we error on will be whatever line happens to
* sort first. This is lazy, it's enough that we report one
* bad case here, we don't need to report the first/last one,
* or all of them.
*/
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
if (!is_pack_valid(p))
die(_("packfile %s cannot be accessed"), p->pack_name);
}
/*
* Then, handle all of the excluded packs, marking them as
* kept in-core so that later calls to add_object_entry()
* discards any objects that are also found in excluded packs.
*/
for_each_string_list_item(item, &exclude_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
p->pack_keep_in_core = 1;
string_list_append(&keys, entry->key)->util = info;
}
/*
@@ -3908,19 +3883,89 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
* string_list_item's ->util pointer, which string_list_sort() does not
* provide.
*/
QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
QSORT(keys.items, keys.nr, pack_mtime_cmp);
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
ODB_FOR_EACH_OBJECT_PACK_ORDER);
for_each_string_list_item(item, &keys) {
struct stdin_pack_info *info = item->util;
if (info->kind & STDIN_PACK_INCLUDE)
for_each_object_in_pack(info->p,
add_object_entry_from_pack,
revs,
ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
string_list_clear(&keys, 0);
}
static void stdin_packs_read_input(struct rev_info *revs)
{
struct strbuf buf = STRBUF_INIT;
struct strmap packs = STRMAP_INIT;
struct packed_git *p;
while (strbuf_getline(&buf, stdin) != EOF) {
struct stdin_pack_info *info;
enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE;
const char *key = buf.buf;
if (!*key)
continue;
else if (*key == '^')
kind = STDIN_PACK_EXCLUDE_CLOSED;
if (kind != STDIN_PACK_INCLUDE)
key++;
info = strmap_get(&packs, key);
if (!info) {
CALLOC_ARRAY(info, 1);
strmap_put(&packs, key, info);
}
info->kind |= kind;
strbuf_reset(&buf);
}
repo_for_each_pack(the_repository, p) {
struct stdin_pack_info *info;
info = strmap_get(&packs, pack_basename(p));
if (!info)
continue;
if (info->kind & STDIN_PACK_INCLUDE) {
if (exclude_promisor_objects && p->pack_promisor)
die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
/*
* Arguments we got on stdin may not even be
* packs. First check that to avoid segfaulting
* later on in e.g. pack_mtime_cmp(), excluded
* packs are handled below.
*/
if (!is_pack_valid(p))
die(_("packfile %s cannot be accessed"), p->pack_name);
}
if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) {
/*
* Marking excluded packs as kept in-core so
* that later calls to add_object_entry()
* discards any objects that are also found in
* excluded packs.
*/
p->pack_keep_in_core = 1;
}
info->p = p;
}
stdin_packs_add_pack_entries(&packs, revs);
strbuf_release(&buf);
string_list_clear(&include_packs, 0);
string_list_clear(&exclude_packs, 0);
strmap_clear(&packs, 1);
}
static void add_unreachable_loose_objects(struct rev_info *revs);
@@ -3957,7 +4002,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
read_packs_list_from_stdin(&revs);
stdin_packs_read_input(&revs);
if (rev_list_unpacked)
add_unreachable_loose_objects(&revs);