object-file: move logic to compute packed abbreviation length

Same as the preceding commit, move the logic that computes the minimum
required prefix length to make a given object ID unique for the packfile
store into a new function `packfile_store_find_abbrev_len()` that is
part of "packfile.c". This prepares for making the logic fully generic
via pluggable object databases.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Patrick Steinhardt
2026-03-20 08:07:39 +01:00
committed by Junio C Hamano
parent ab3ab1038d
commit 6c2ede6e4a
3 changed files with 128 additions and 123 deletions

View File

@@ -582,115 +582,6 @@ static unsigned msb(unsigned long val)
return r;
}
struct min_abbrev_data {
unsigned int init_len;
unsigned int cur_len;
struct repository *repo;
const struct object_id *oid;
};
static int extend_abbrev_len(const struct object_id *oid,
struct min_abbrev_data *mad)
{
unsigned len = oid_common_prefix_hexlen(oid, mad->oid);
if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len)
mad->cur_len = len + 1;
return 0;
}
static void find_abbrev_len_for_midx(struct multi_pack_index *m,
struct min_abbrev_data *mad)
{
for (; m; m = m->base_midx) {
int match = 0;
uint32_t num, first = 0;
struct object_id oid;
const struct object_id *mad_oid;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
mad_oid = mad->oid;
match = bsearch_one_midx(mad_oid, m, &first);
/*
* first is now the position in the packfile where we
* would insert mad->hash if it does not exist (or the
* position of mad->hash if it does exist). Hence, we
* consider a maximum of two objects nearby for the
* abbreviation length.
*/
mad->init_len = 0;
if (!match) {
if (nth_midxed_object_oid(&oid, m, first))
extend_abbrev_len(&oid, mad);
} else if (first < num - 1) {
if (nth_midxed_object_oid(&oid, m, first + 1))
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
if (nth_midxed_object_oid(&oid, m, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
}
}
static void find_abbrev_len_for_pack(struct packed_git *p,
struct min_abbrev_data *mad)
{
int match = 0;
uint32_t num, first = 0;
struct object_id oid;
const struct object_id *mad_oid;
if (p->multi_pack_index)
return;
if (open_pack_index(p) || !p->num_objects)
return;
num = p->num_objects;
mad_oid = mad->oid;
match = bsearch_pack(mad_oid, p, &first);
/*
* first is now the position in the packfile where we would insert
* mad->hash if it does not exist (or the position of mad->hash if
* it does exist). Hence, we consider a maximum of two objects
* nearby for the abbreviation length.
*/
mad->init_len = 0;
if (!match) {
if (!nth_packed_object_id(&oid, p, first))
extend_abbrev_len(&oid, mad);
} else if (first < num - 1) {
if (!nth_packed_object_id(&oid, p, first + 1))
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
if (!nth_packed_object_id(&oid, p, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
}
static void find_abbrev_len_packed(struct min_abbrev_data *mad)
{
struct packed_git *p;
odb_prepare_alternates(mad->repo->objects);
for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) {
struct multi_pack_index *m = get_multi_pack_index(source);
if (m)
find_abbrev_len_for_midx(m, mad);
}
repo_for_each_pack(mad->repo, p)
find_abbrev_len_for_pack(p, mad);
}
void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,
const struct object_id *oid, int abbrev_len)
{
@@ -707,14 +598,14 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid,
}
int repo_find_unique_abbrev_r(struct repository *r, char *hex,
const struct object_id *oid, int len)
const struct object_id *oid, int min_len)
{
const struct git_hash_algo *algo =
oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
struct min_abbrev_data mad;
const unsigned hexsz = algo->hexsz;
unsigned len;
if (len < 0) {
if (min_len < 0) {
unsigned long count;
if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
@@ -738,25 +629,23 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
*/
if (len < FALLBACK_DEFAULT_ABBREV)
len = FALLBACK_DEFAULT_ABBREV;
} else {
len = min_len;
}
oid_to_hex_r(hex, oid);
if (len >= hexsz || !len)
return hexsz;
mad.repo = r;
mad.init_len = len;
mad.cur_len = len;
mad.oid = oid;
find_abbrev_len_packed(&mad);
odb_prepare_alternates(r->objects);
for (struct odb_source *s = r->objects->sources; s; s = s->next)
odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len);
for (struct odb_source *s = r->objects->sources; s; s = s->next) {
struct odb_source_files *files = odb_source_files_downcast(s);
packfile_store_find_abbrev_len(files->packed, oid, len, &len);
odb_source_loose_find_abbrev_len(s, oid, len, &len);
}
hex[mad.cur_len] = 0;
return mad.cur_len;
hex[len] = 0;
return len;
}
const char *repo_find_unique_abbrev(struct repository *r,

View File

@@ -2597,6 +2597,117 @@ out:
return ret;
}
static int extend_abbrev_len(const struct object_id *a,
const struct object_id *b,
unsigned *out)
{
unsigned len = oid_common_prefix_hexlen(a, b);
if (len != hash_algos[a->algo].hexsz && len >= *out)
*out = len + 1;
return 0;
}
static void find_abbrev_len_for_midx(struct multi_pack_index *m,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
unsigned len = min_len;
for (; m; m = m->base_midx) {
int match = 0;
uint32_t num, first = 0;
struct object_id found_oid;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
match = bsearch_one_midx(oid, m, &first);
/*
* first is now the position in the packfile where we
* would insert the object ID if it does not exist (or the
* position of the object ID if it does exist). Hence, we
* consider a maximum of two objects nearby for the
* abbreviation length.
*/
if (!match) {
if (nth_midxed_object_oid(&found_oid, m, first))
extend_abbrev_len(&found_oid, oid, &len);
} else if (first < num - 1) {
if (nth_midxed_object_oid(&found_oid, m, first + 1))
extend_abbrev_len(&found_oid, oid, &len);
}
if (first > 0) {
if (nth_midxed_object_oid(&found_oid, m, first - 1))
extend_abbrev_len(&found_oid, oid, &len);
}
}
*out = len;
}
static void find_abbrev_len_for_pack(struct packed_git *p,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
int match;
uint32_t num, first = 0;
struct object_id found_oid;
unsigned len = min_len;
num = p->num_objects;
match = bsearch_pack(oid, p, &first);
/*
* first is now the position in the packfile where we would insert
* the object ID if it does not exist (or the position of mad->hash if
* it does exist). Hence, we consider a maximum of two objects
* nearby for the abbreviation length.
*/
if (!match) {
if (!nth_packed_object_id(&found_oid, p, first))
extend_abbrev_len(&found_oid, oid, &len);
} else if (first < num - 1) {
if (!nth_packed_object_id(&found_oid, p, first + 1))
extend_abbrev_len(&found_oid, oid, &len);
}
if (first > 0) {
if (!nth_packed_object_id(&found_oid, p, first - 1))
extend_abbrev_len(&found_oid, oid, &len);
}
*out = len;
}
int packfile_store_find_abbrev_len(struct packfile_store *store,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
struct packfile_list_entry *e;
struct multi_pack_index *m;
m = get_multi_pack_index(store->source);
if (m)
find_abbrev_len_for_midx(m, oid, min_len, &min_len);
for (e = packfile_store_get_packs(store); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack) || !e->pack->num_objects)
continue;
find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len);
}
*out = min_len;
return 0;
}
struct add_promisor_object_data {
struct repository *repo;
struct oidset *set;

View File

@@ -369,6 +369,11 @@ int packfile_store_for_each_object(struct packfile_store *store,
void *cb_data,
const struct odb_for_each_object_options *opts);
int packfile_store_find_abbrev_len(struct packfile_store *store,
const struct object_id *oid,
unsigned min_len,
unsigned *out);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
#define PACKDIR_FILE_IDX 2