From 736cef847cf788d90f39d15bb4be684bc4ba1013 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Thu, 19 Mar 2026 22:49:06 +0000 Subject: [PATCH 01/15] object-file: fix sparse 'plain integer as NULL pointer' error Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 569ce6eaed..fa2ca60a59 100644 --- a/object-file.c +++ b/object-file.c @@ -1916,7 +1916,7 @@ int odb_source_loose_count_objects(struct odb_source *source, } else { *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, 0); + out, NULL); } out: From 1382e54a9c9e5f98271a943af9c10299c6ba934b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:27 +0100 Subject: [PATCH 02/15] oidtree: modernize the code a bit The "oidtree.c" subsystem is rather small and self-contained and tends to just work. It thus doesn't typically receive a lot of attention, which has as a consequence that it's coding style is somewhat dated nowadays. Modernize the style of this subsystem a bit: - Rename the `oidtree_iter()` function to `oidtree_each_cb()`. - Rename `struct oidtree_iter_data` to `struct oidtree_each_data` to match the renamed callback function type. - Rename parameters and variables to clarify their intent. - Add comments that explain what some of the functions do. - Adapt the return value of `oidtree_contains()` to be a boolean. This prepares for some changes to the subsystem that'll happen in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- oidtree.c | 59 ++++++++++++++++++++-------------------- oidtree.h | 42 +++++++++++++++++++++++----- t/unit-tests/u-oidtree.c | 14 +++++----- 3 files changed, 72 insertions(+), 43 deletions(-) diff --git a/oidtree.c b/oidtree.c index 324de94934..a4d10cd429 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,14 +6,6 @@ #include "oidtree.h" #include "hash.h" -struct oidtree_iter_data { - oidtree_iter fn; - void *arg; - size_t *last_nibble_at; - uint32_t algo; - uint8_t last_byte; -}; - void oidtree_init(struct oidtree *ot) { cb_init(&ot->tree); @@ -54,8 +46,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) cb_insert(&ot->tree, on, sizeof(*oid)); } - -int oidtree_contains(struct oidtree *ot, const struct object_id *oid) +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); @@ -69,41 +60,51 @@ int oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0; + return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); } -static enum cb_next iter(struct cb_node *n, void *arg) +struct oidtree_each_data { + oidtree_each_cb cb; + void *cb_data; + size_t *last_nibble_at; + uint32_t algo; + uint8_t last_byte; +}; + +static enum cb_next iter(struct cb_node *n, void *cb_data) { - struct oidtree_iter_data *x = arg; + struct oidtree_each_data *data = cb_data; struct object_id k; /* Copy to provide 4-byte alignment needed by struct object_id. */ memcpy(&k, n->k, sizeof(k)); - if (x->algo != GIT_HASH_UNKNOWN && x->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) return CB_CONTINUE; - if (x->last_nibble_at) { - if ((k.hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0) + if (data->last_nibble_at) { + if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return CB_CONTINUE; } - return x->fn(&k, x->arg); + return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *oid, - size_t oidhexsz, oidtree_iter fn, void *arg) +void oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { - size_t klen = oidhexsz / 2; - struct oidtree_iter_data x = { 0 }; - assert(oidhexsz <= GIT_MAX_HEXSZ); + struct oidtree_each_data data = { + .cb = cb, + .cb_data = cb_data, + .algo = prefix->algo, + }; + size_t klen = prefix_hex_len / 2; + assert(prefix_hex_len <= GIT_MAX_HEXSZ); - x.fn = fn; - x.arg = arg; - x.algo = oid->algo; - if (oidhexsz & 1) { - x.last_byte = oid->hash[klen]; - x.last_nibble_at = &klen; + if (prefix_hex_len & 1) { + data.last_byte = prefix->hash[klen]; + data.last_nibble_at = &klen; } - cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x); + + cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 77898f510a..0651401017 100644 --- a/oidtree.h +++ b/oidtree.h @@ -5,18 +5,46 @@ #include "hash.h" #include "mem-pool.h" +/* + * OID trees are an efficient storage for object IDs that use a critbit tree + * internally. Common prefixes are duplicated and object IDs are stored in a + * way that allow easy iteration over the objects in lexicographic order. As a + * consequence, operations that want to enumerate all object IDs that match a + * given prefix can be answered efficiently. + * + * Note that it is not (yet) possible to store data other than the object IDs + * themselves in this tree. + */ struct oidtree { struct cb_tree tree; struct mem_pool mem_pool; }; -void oidtree_init(struct oidtree *); -void oidtree_clear(struct oidtree *); -void oidtree_insert(struct oidtree *, const struct object_id *); -int oidtree_contains(struct oidtree *, const struct object_id *); +/* Initialize the oidtree so that it is ready for use. */ +void oidtree_init(struct oidtree *ot); -typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data); -void oidtree_each(struct oidtree *, const struct object_id *, - size_t oidhexsz, oidtree_iter, void *data); +/* + * Release all memory associated with the oidtree and reinitialize it for + * subsequent use. + */ +void oidtree_clear(struct oidtree *ot); + +/* Insert the object ID into the tree. */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid); + +/* Check whether the tree contains the given object ID. */ +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); + +/* Callback function used for `oidtree_each()`. */ +typedef enum cb_next (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); + +/* + * Iterate through all object IDs in the tree whose prefix matches the given + * object ID prefix and invoke the callback function on each of them. + */ +void oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index e6eede2740..def47c6795 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -24,7 +24,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) return 0; } -static void check_contains(struct oidtree *ot, const char *hex, int expected) +static void check_contains(struct oidtree *ot, const char *hex, bool expected) { struct object_id oid; @@ -88,12 +88,12 @@ void test_oidtree__cleanup(void) void test_oidtree__contains(void) { FILL_TREE(&ot, "444", "1", "2", "3", "4", "5", "a", "b", "c", "d", "e"); - check_contains(&ot, "44", 0); - check_contains(&ot, "441", 0); - check_contains(&ot, "440", 0); - check_contains(&ot, "444", 1); - check_contains(&ot, "4440", 1); - check_contains(&ot, "4444", 0); + check_contains(&ot, "44", false); + check_contains(&ot, "441", false); + check_contains(&ot, "440", false); + check_contains(&ot, "444", true); + check_contains(&ot, "4440", true); + check_contains(&ot, "4444", false); } void test_oidtree__each(void) From fe446b01aeaab307adcbfb39d4aaa72c37afbcda Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:28 +0100 Subject: [PATCH 03/15] oidtree: extend iteration to allow for arbitrary return codes The interface `cb_each()` iterates through a crit-bit tree and calls a specific callback function for each of the contained items. The callback function is expected to return either: - `CB_CONTINUE` in case iteration shall continue. - `CB_BREAK` to abort iteration. This is needlessly restrictive though, as callers may want to return arbitrary values and have them be bubbled up to the `cb_each()` call site. In fact, this is a rather common pattern we have: whenever such a callback function returns a non-zero error code, we abort iteration and bubble up the code as-is. Refactor both the crit-bit tree and oidtree subsystems to behave accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cbtree.c | 21 ++++++++++++--------- cbtree.h | 17 +++++++++-------- object-name.c | 4 ++-- oidtree.c | 12 ++++++------ oidtree.h | 18 ++++++++++++------ t/unit-tests/u-oidtree.c | 4 ++-- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/cbtree.c b/cbtree.c index cf8cf75b89..4ab794bddc 100644 --- a/cbtree.c +++ b/cbtree.c @@ -96,26 +96,28 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) return p && !memcmp(p->k, k, klen) ? p : NULL; } -static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg) +static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) { if (1 & (uintptr_t)p) { struct cb_node *q = cb_node_of(p); - enum cb_next n = cb_descend(q->child[0], fn, arg); - - return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg); + int ret = cb_descend(q->child[0], fn, arg); + if (ret) + return ret; + return cb_descend(q->child[1], fn, arg); } else { return fn(p, arg); } } -void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, - cb_iter fn, void *arg) +int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, + cb_iter fn, void *arg) { struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; - if (!p) return; /* empty tree */ + if (!p) + return 0; /* empty tree */ /* Walk tree, maintaining top pointer */ while (1 & (uintptr_t)p) { @@ -130,7 +132,8 @@ void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, for (i = 0; i < klen; i++) { if (p->k[i] != kpfx[i]) - return; /* "best" match failed */ + return 0; /* "best" match failed */ } - cb_descend(top, fn, arg); + + return cb_descend(top, fn, arg); } diff --git a/cbtree.h b/cbtree.h index 43193abdda..c374b1b3db 100644 --- a/cbtree.h +++ b/cbtree.h @@ -30,11 +30,6 @@ struct cb_tree { struct cb_node *root; }; -enum cb_next { - CB_CONTINUE = 0, - CB_BREAK = 1 -}; - #define CBTREE_INIT { 0 } static inline void cb_init(struct cb_tree *t) @@ -46,9 +41,15 @@ static inline void cb_init(struct cb_tree *t) struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen); struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen); -typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg); +/* + * Callback invoked by `cb_each()` for each node in the critbit tree. A return + * value of 0 will cause the iteration to continue, a non-zero return code will + * cause iteration to abort. The error code will be relayed back from + * `cb_each()` in that case. + */ +typedef int (*cb_iter)(struct cb_node *, void *arg); -void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, - cb_iter, void *arg); +int cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, + cb_iter, void *arg); #endif /* CBTREE_H */ diff --git a/object-name.c b/object-name.c index e5adec4c9d..a24a1b48e1 100644 --- a/object-name.c +++ b/object-name.c @@ -103,12 +103,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static enum cb_next match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ update_candidates(ds, oid); - return ds->ambiguous ? CB_BREAK : CB_CONTINUE; + return ds->ambiguous; } static void find_short_object_filename(struct disambiguate_state *ds) diff --git a/oidtree.c b/oidtree.c index a4d10cd429..ab9fe7ec7a 100644 --- a/oidtree.c +++ b/oidtree.c @@ -71,7 +71,7 @@ struct oidtree_each_data { uint8_t last_byte; }; -static enum cb_next iter(struct cb_node *n, void *cb_data) +static int iter(struct cb_node *n, void *cb_data) { struct oidtree_each_data *data = cb_data; struct object_id k; @@ -80,18 +80,18 @@ static enum cb_next iter(struct cb_node *n, void *cb_data) memcpy(&k, n->k, sizeof(k)); if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) - return CB_CONTINUE; + return 0; if (data->last_nibble_at) { if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) - return CB_CONTINUE; + return 0; } return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *prefix, - size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) +int oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { struct oidtree_each_data data = { .cb = cb, @@ -106,5 +106,5 @@ void oidtree_each(struct oidtree *ot, const struct object_id *prefix, data.last_nibble_at = &klen; } - cb_each(&ot->tree, prefix->hash, klen, iter, &data); + return cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 0651401017..2b7bad2e60 100644 --- a/oidtree.h +++ b/oidtree.h @@ -35,16 +35,22 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); -/* Callback function used for `oidtree_each()`. */ -typedef enum cb_next (*oidtree_each_cb)(const struct object_id *oid, - void *cb_data); +/* + * Callback function used for `oidtree_each()`. Returning a non-zero exit code + * will cause iteration to stop. The exit code will be propagated to the caller + * of `oidtree_each()`. + */ +typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); /* * Iterate through all object IDs in the tree whose prefix matches the given * object ID prefix and invoke the callback function on each of them. + * + * Returns any non-zero exit code from the provided callback function. */ -void oidtree_each(struct oidtree *ot, - const struct object_id *prefix, size_t prefix_hex_len, - oidtree_each_cb cb, void *cb_data); +int oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index def47c6795..d4d05c7dc3 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -38,7 +38,7 @@ struct expected_hex_iter { const char *query; }; -static enum cb_next check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *data) { struct expected_hex_iter *hex_iter = data; struct object_id expected; @@ -49,7 +49,7 @@ static enum cb_next check_each_cb(const struct object_id *oid, void *data) &expected); cl_assert_equal_s(oid_to_hex(oid), oid_to_hex(&expected)); hex_iter->i += 1; - return CB_CONTINUE; + return 0; } LAST_ARG_MUST_BE_NULL From cfd575f0a9730712107e4ee6799a37665bcd8204 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:29 +0100 Subject: [PATCH 04/15] odb: introduce `struct odb_for_each_object_options` The `odb_for_each_object()` function only accepts a bitset of flags. In a subsequent commit we'll want to change object iteration to also support iterating over only those objects that have a specific prefix. While we could of course add the prefix to the function signature, or alternatively introduce a new function, both of these options don't really seem to be that sensible. Instead, introduce a new `struct odb_for_each_object_options` that can be passed to a new `odb_for_each_object_ext()` function. Splice through the options structure into the respective object database sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 7 +++++-- builtin/pack-objects.c | 12 +++++++----- commit-graph.c | 5 ++++- object-file.c | 9 +++++---- object-file.h | 2 +- odb.c | 38 +++++++++++++++++++++++++------------- odb.h | 16 ++++++++++++++++ odb/source-files.c | 8 ++++---- odb/source.h | 6 +++--- packfile.c | 12 ++++++------ packfile.h | 2 +- 11 files changed, 77 insertions(+), 40 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b6f12f41d6..cd13a3a89f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -848,6 +848,9 @@ static void batch_each_object(struct batch_options *opt, .callback = callback, .payload = _payload, }; + struct odb_for_each_object_options opts = { + .flags = flags, + }; struct bitmap_index *bitmap = NULL; struct odb_source *source; @@ -860,7 +863,7 @@ static void batch_each_object(struct batch_options *opt, odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, flags); + &payload, &opts); if (ret) break; } @@ -884,7 +887,7 @@ static void batch_each_object(struct batch_options *opt, for (source = the_repository->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); int ret = packfile_store_for_each_object(files->packed, &oi, - batch_one_object_oi, &payload, flags); + batch_one_object_oi, &payload, &opts); if (ret) break; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cd013c0b68..3bb57ff183 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4344,6 +4344,12 @@ static void add_objects_in_unpacked_packs(void) { struct odb_source *source; time_t mtime; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS, + }; struct object_info oi = { .mtimep = &mtime, }; @@ -4356,11 +4362,7 @@ static void add_objects_in_unpacked_packs(void) continue; if (packfile_store_for_each_object(files->packed, &oi, - add_object_in_unpacked_pack, NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + add_object_in_unpacked_pack, NULL, &opts)) die(_("cannot open pack index")); } } diff --git a/commit-graph.c b/commit-graph.c index c030003330..df4b4a125e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1969,6 +1969,9 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { struct odb_source *source; enum object_type type; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER, + }; struct object_info oi = { .typep = &type, }; @@ -1983,7 +1986,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) for (source = ctx->r->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi, - ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + ctx, &opts); } if (ctx->progress_done < ctx->approx_nr_objects) diff --git a/object-file.c b/object-file.c index 9764c8dd06..56cbb27ab9 100644 --- a/object-file.c +++ b/object-file.c @@ -1849,7 +1849,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct for_each_object_wrapper_data data = { .source = source, @@ -1859,9 +1859,9 @@ int odb_source_loose_for_each_object(struct odb_source *source, }; /* There are no loose promisor objects, so we can return immediately. */ - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) return 0; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, @@ -1914,9 +1914,10 @@ int odb_source_loose_count_objects(struct odb_source *source, *out = count * 256; ret = 0; } else { + struct odb_for_each_object_options opts = { 0 }; *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, NULL); + out, &opts); } out: diff --git a/object-file.h b/object-file.h index f8d8805a18..46dfa7b632 100644 --- a/object-file.h +++ b/object-file.h @@ -137,7 +137,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * Count the number of loose objects in this source. diff --git a/odb.c b/odb.c index 350e23f3c0..3019957b87 100644 --- a/odb.c +++ b/odb.c @@ -896,25 +896,37 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + ret = odb_source_for_each_object(source, request, cb, cb_data, opts); + if (ret) + return ret; + } + + return 0; +} + int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, unsigned flags) { - int ret; - - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) { - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) - continue; - - ret = odb_source_for_each_object(source, request, cb, cb_data, flags); - if (ret) - return ret; - } - - return 0; + struct odb_for_each_object_options opts = { + .flags = flags, + }; + return odb_for_each_object_ext(odb, request, cb, cb_data, &opts); } int odb_count_objects(struct object_database *odb, diff --git a/odb.h b/odb.h index 9aee260105..a19a8bb50d 100644 --- a/odb.h +++ b/odb.h @@ -481,6 +481,15 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Options that can be passed to `odb_for_each_object()` and its + * backend-specific implementations. + */ +struct odb_for_each_object_options { + /* A bitfield of `odb_for_each_object_flags`. */ + enum odb_for_each_object_flags flags; +}; + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored @@ -495,6 +504,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, * Returns 0 on success, a negative error code in case a failure occurred, or * an arbitrary non-zero error code returned by the callback itself. */ +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts); + +/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, diff --git a/odb/source-files.c b/odb/source-files.c index c08d8993e3..e90bb689bb 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -75,18 +75,18 @@ static int odb_source_files_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct odb_source_files *files = odb_source_files_downcast(source); int ret; - if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, cb, cb_data, flags); + if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts); if (ret) return ret; } - ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, flags); + ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, opts); if (ret) return ret; diff --git a/odb/source.h b/odb/source.h index 96c906e7a1..ee5d6ed530 100644 --- a/odb/source.h +++ b/odb/source.h @@ -140,7 +140,7 @@ struct odb_source { const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * This callback is expected to count objects in the given object @@ -343,9 +343,9 @@ static inline int odb_source_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { - return source->for_each_object(source, request, cb, cb_data, flags); + return source->for_each_object(source, request, cb, cb_data, opts); } /* diff --git a/packfile.c b/packfile.c index d4de9f3ffe..a6f3d2035d 100644 --- a/packfile.c +++ b/packfile.c @@ -2375,7 +2375,7 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct packfile_store_for_each_object_wrapper_data data = { .store = store, @@ -2391,15 +2391,15 @@ int packfile_store_for_each_object(struct packfile_store *store, for (e = packfile_store_get_packs(store); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2408,7 +2408,7 @@ int packfile_store_for_each_object(struct packfile_store *store, } ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, - &data, flags); + &data, opts->flags); if (ret) goto out; } diff --git a/packfile.h b/packfile.h index a16ec3950d..fa41dfda38 100644 --- a/packfile.h +++ b/packfile.h @@ -367,7 +367,7 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 From 284b7862be735bb47276ac288ace153ae3d06938 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:30 +0100 Subject: [PATCH 05/15] object-name: move logic to iterate through loose prefixed objects The logic to iterate through loose objects that have a certain prefix is currently hosted in "object-name.c". This logic reaches into specifics of the loose object source, so it breaks once a different backend is used for the object storage. Move the logic to iterate through loose objects with a prefix into "object-file.c". This is done by extending the for-each-object options to support an optional prefix that is then honored by the loose source. Naturally, we'll also have this support in the packfile store. This is done in the next commit. Furthermore, there are no users of the loose cache outside of "object-file.c" anymore. As such, convert `odb_source_loose_cache()` to have file scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++++-- object-file.h | 7 ------- object-name.c | 10 ++++++---- odb.h | 7 +++++++ 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/object-file.c b/object-file.c index 56cbb27ab9..13732f324f 100644 --- a/object-file.c +++ b/object-file.c @@ -33,6 +33,9 @@ /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); + static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -1845,6 +1848,23 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } } +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_loose_read_object_info(data->source, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, @@ -1864,6 +1884,11 @@ int odb_source_loose_for_each_object(struct odb_source *source, if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(source, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, NULL, NULL, &data); } @@ -1935,8 +1960,8 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; diff --git a/object-file.h b/object-file.h index 46dfa7b632..f11ad58f6c 100644 --- a/object-file.h +++ b/object-file.h @@ -74,13 +74,6 @@ int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); -/* - * Populate and return the loose object cache array corresponding to the - * given object ID. - */ -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/object-name.c b/object-name.c index a24a1b48e1..929a68dbd0 100644 --- a/object-name.c +++ b/object-name.c @@ -16,7 +16,6 @@ #include "remote.h" #include "dir.h" #include "oid-array.h" -#include "oidtree.h" #include "packfile.h" #include "pretty.h" #include "object-file.h" @@ -103,7 +102,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static int match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ @@ -113,11 +112,14 @@ static int match_prefix(const struct object_id *oid, void *arg) static void find_short_object_filename(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), - &ds->bin_pfx, ds->len, match_prefix, ds); + odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) diff --git a/odb.h b/odb.h index a19a8bb50d..e80fd8f7ab 100644 --- a/odb.h +++ b/odb.h @@ -488,6 +488,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct odb_for_each_object_options { /* A bitfield of `odb_for_each_object_flags`. */ enum odb_for_each_object_flags flags; + + /* + * If set, only iterate through objects whose first `prefix_hex_len` + * hex characters matches the given prefix. + */ + const struct object_id *prefix; + size_t prefix_hex_len; }; /* From e30bff8f8402f0f147a08fe00b75e24f293fa870 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:31 +0100 Subject: [PATCH 06/15] object-name: move logic to iterate through packed prefixed objects Similar to the preceding commit, move the logic to iterate through objects that have a given prefix into "packfile.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 94 ++------------------------- packfile.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 87 deletions(-) diff --git a/object-name.c b/object-name.c index 929a68dbd0..ff0de06ff9 100644 --- a/object-name.c +++ b/object-name.c @@ -100,8 +100,6 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* otherwise, current can be discarded and candidate is still good */ } -static int match_hash(unsigned, const unsigned char *, const unsigned char *); - static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; @@ -122,103 +120,25 @@ static void find_short_object_filename(struct disambiguate_state *ds) odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } -static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) -{ - do { - if (*a != *b) - return 0; - a++; - b++; - len -= 2; - } while (len > 1); - if (len) - if ((*a ^ *b) & 0xf0) - return 0; - return 1; -} - -static void unique_in_midx(struct multi_pack_index *m, - struct disambiguate_state *ds) -{ - for (; m; m = m->base_midx) { - uint32_t num, i, first = 0; - const struct object_id *current = NULL; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - - bsearch_one_midx(&ds->bin_pfx, m, &first); - - /* - * At this point, "first" is the location of the lowest - * object with an object name that could match - * "bin_pfx". See if we have 0, 1 or more objects that - * actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - current = nth_midxed_object_oid(&oid, m, i); - if (!match_hash(len, ds->bin_pfx.hash, current->hash)) - break; - update_candidates(ds, current); - } - } -} - -static void unique_in_pack(struct packed_git *p, - struct disambiguate_state *ds) -{ - uint32_t num, i, first = 0; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - bsearch_pack(&ds->bin_pfx, p, &first); - - /* - * At this point, "first" is the location of the lowest object - * with an object name that could match "bin_pfx". See if we have - * 0, 1 or more objects that actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - nth_packed_object_id(&oid, p, i); - if (!match_hash(len, ds->bin_pfx.hash, oid.hash)) - break; - update_candidates(ds, &oid); - } -} - static void find_short_packed_object(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; - struct packed_git *p; /* Skip, unless oids from the storage hash algorithm are wanted */ if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) return; odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - unique_in_midx(m, ds); - } + for (source = ds->repo->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); - repo_for_each_pack(ds->repo, p) { + packfile_store_for_each_object(files->packed, NULL, match_prefix, ds, &opts); if (ds->ambiguous) break; - unique_in_pack(p, ds); } } diff --git a/packfile.c b/packfile.c index a6f3d2035d..2539a371c1 100644 --- a/packfile.c +++ b/packfile.c @@ -2371,6 +2371,177 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, } } +static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) +{ + do { + if (*a != *b) + return 0; + a++; + b++; + len -= 2; + } while (len > 1); + if (len) + if ((*a ^ *b) & 0xf0) + return 0; + return 1; +} + +static int for_each_prefixed_object_in_midx( + struct packfile_store *store, + struct multi_pack_index *m, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + int ret; + + for (; m; m = m->base_midx) { + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > m->source->odb->repo->hash_algo->hexsz ? + m->source->odb->repo->hash_algo->hexsz : opts->prefix_hex_len; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + + bsearch_one_midx(opts->prefix, m, &first); + + /* + * At this point, "first" is the location of the lowest + * object with an object name that could match "opts->prefix". + * See if we have 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + const struct object_id *current = NULL; + struct object_id oid; + + current = nth_midxed_object_oid(&oid, m, i); + + if (!match_hash(len, opts->prefix->hash, current->hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, current, + &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + } + + ret = 0; + +out: + return ret; +} + +static int for_each_prefixed_object_in_pack( + struct packfile_store *store, + struct packed_git *p, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ? + p->repo->hash_algo->hexsz : opts->prefix_hex_len; + int ret; + + num = p->num_objects; + bsearch_pack(opts->prefix, p, &first); + + /* + * At this point, "first" is the location of the lowest object + * with an object name that could match "bin_pfx". See if we have + * 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + struct object_id oid; + + nth_packed_object_id(&oid, p, i); + if (!match_hash(len, opts->prefix->hash, oid.hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, &oid, &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +static int packfile_store_for_each_prefixed_object( + struct packfile_store *store, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + bool pack_errors = false; + int ret; + + if (opts->flags) + BUG("flags unsupported"); + + store->skip_mru_updates = true; + + m = get_multi_pack_index(store->source); + if (m) { + ret = for_each_prefixed_object_in_midx(store, m, opts, data); + if (ret) + goto out; + } + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + + if (open_pack_index(e->pack)) { + pack_errors = true; + continue; + } + + if (!e->pack->num_objects) + continue; + + ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data); + if (ret) + goto out; + } + + ret = 0; + +out: + store->skip_mru_updates = false; + if (!ret && pack_errors) + ret = -1; + return ret; +} + int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, @@ -2386,6 +2557,9 @@ int packfile_store_for_each_object(struct packfile_store *store, struct packfile_list_entry *e; int pack_errors = 0, ret; + if (opts->prefix) + return packfile_store_for_each_prefixed_object(store, opts, &data); + store->skip_mru_updates = true; for (e = packfile_store_get_packs(store); e; e = e->next) { From 28c9254e3b3e1304b5a6c36146cf6fec29f3f5d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:32 +0100 Subject: [PATCH 07/15] object-name: extract function to parse object ID prefixes Extract the logic that parses an object ID prefix into a new function. This function will be used by a second callsite in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 60 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/object-name.c b/object-name.c index ff0de06ff9..fd1b010ab3 100644 --- a/object-name.c +++ b/object-name.c @@ -270,41 +270,57 @@ int set_disambiguate_hint_config(const char *var, const char *value) return error("unknown hint type for '%s': %s", var, value); } +static int parse_oid_prefix(const char *name, int len, + const struct git_hash_algo *algo, + char *hex_out, + struct object_id *oid_out) +{ + for (int i = 0; i < len; i++) { + unsigned char c = name[i]; + unsigned char val; + if (c >= '0' && c <= '9') { + val = c - '0'; + } else if (c >= 'a' && c <= 'f') { + val = c - 'a' + 10; + } else if (c >= 'A' && c <='F') { + val = c - 'A' + 10; + c -= 'A' - 'a'; + } else { + return -1; + } + + if (hex_out) + hex_out[i] = c; + if (oid_out) { + if (!(i & 1)) + val <<= 4; + oid_out->hash[i >> 1] |= val; + } + } + + if (hex_out) + hex_out[len] = '\0'; + if (oid_out) + oid_out->algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; + + return 0; +} + static int init_object_disambiguation(struct repository *r, const char *name, int len, const struct git_hash_algo *algo, struct disambiguate_state *ds) { - int i; - if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ) return -1; memset(ds, 0, sizeof(*ds)); - for (i = 0; i < len ;i++) { - unsigned char c = name[i]; - unsigned char val; - if (c >= '0' && c <= '9') - val = c - '0'; - else if (c >= 'a' && c <= 'f') - val = c - 'a' + 10; - else if (c >= 'A' && c <='F') { - val = c - 'A' + 10; - c -= 'A' - 'a'; - } - else - return -1; - ds->hex_pfx[i] = c; - if (!(i & 1)) - val <<= 4; - ds->bin_pfx.hash[i >> 1] |= val; - } + if (parse_oid_prefix(name, len, algo, ds->hex_pfx, &ds->bin_pfx) < 0) + return -1; ds->len = len; - ds->hex_pfx[len] = '\0'; ds->repo = r; - ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; odb_prepare_alternates(r->objects); return 0; } From d2612fe59e605102cb422fadbb0cb8bea499daee Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:33 +0100 Subject: [PATCH 08/15] object-name: backend-generic `repo_collect_ambiguous()` The function `repo_collect_ambiguous()` is responsible for collecting objects whose IDs match a specific prefix. The information is then used to inform the user about which objects they could have meant in case a short object ID is ambiguous. The logic to do this uses the object disambiguation infrastructure and calls into backend-specific functions to iterate through loose and packed objects. This isn't really required anymore though: all we want to do is to enumerate objects that have such a prefix and then append those objects to a `struct oid_array`. This can be trivially achieved in a generic way now that `odb_for_each_object()` has learned to yield only objects that match such a prefix. Refactor the code to use the backend-generic infrastructure instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/object-name.c b/object-name.c index fd1b010ab3..4c3ace150e 100644 --- a/object-name.c +++ b/object-name.c @@ -448,8 +448,8 @@ static int collect_ambiguous(const struct object_id *oid, void *data) return 0; } -static int repo_collect_ambiguous(struct repository *r UNUSED, - const struct object_id *oid, +static int repo_collect_ambiguous(const struct object_id *oid, + struct object_info *oi UNUSED, void *data) { return collect_ambiguous(oid, data); @@ -586,18 +586,19 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, const struct git_hash_algo *algo, each_abbrev_fn fn, void *cb_data) { + struct object_id prefix_oid = { 0 }; + struct odb_for_each_object_options opts = { + .prefix = &prefix_oid, + .prefix_hex_len = strlen(prefix), + }; struct oid_array collect = OID_ARRAY_INIT; - struct disambiguate_state ds; int ret; - if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0) + if (parse_oid_prefix(prefix, opts.prefix_hex_len, algo, NULL, &prefix_oid) < 0) return -1; - ds.always_call_fn = 1; - ds.fn = repo_collect_ambiguous; - ds.cb_data = &collect; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + if (odb_for_each_object_ext(r->objects, NULL, repo_collect_ambiguous, &collect, &opts) < 0) + return -1; ret = oid_array_for_each_unique(&collect, fn, cb_data); oid_array_clear(&collect); From eac58debd9f61391a61b832e3cee349a20bd2c4a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:34 +0100 Subject: [PATCH 09/15] object-name: backend-generic `get_short_oid()` The function `get_short_oid()` takes as input an abbreviated object ID and tries to turn that object ID into the full object ID. This is done by iterating through all objects that have the user-provided prefix. If that yields exactly one object we know that the abbreviated object ID is unambiguous, otherwise it is ambiguous and we print the list of objects that match the prefix. We iterate through all objects with the given prefix by calling both `find_short_packed_object()` and `find_short_object_filename()`, which is of course specific to the "files" backend. But we now have a generic way to iterate through objects with a specific prefix. Refactor the code to use `odb_for_each_object()` instead so that it works with object backends different than the "files" backend. Remove the now-unused `find_short_packed_object()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/object-name.c b/object-name.c index 4c3ace150e..7a224ab4af 100644 --- a/object-name.c +++ b/object-name.c @@ -120,28 +120,6 @@ static void find_short_object_filename(struct disambiguate_state *ds) odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } -static void find_short_packed_object(struct disambiguate_state *ds) -{ - struct odb_for_each_object_options opts = { - .prefix = &ds->bin_pfx, - .prefix_hex_len = ds->len, - }; - struct odb_source *source; - - /* Skip, unless oids from the storage hash algorithm are wanted */ - if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) - return; - - odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - - packfile_store_for_each_object(files->packed, NULL, match_prefix, ds, &opts); - if (ds->ambiguous) - break; - } -} - static int finish_object_disambiguation(struct disambiguate_state *ds, struct object_id *oid) { @@ -499,6 +477,7 @@ static enum get_oid_result get_short_oid(struct repository *r, struct object_id *oid, unsigned flags) { + struct odb_for_each_object_options opts = { 0 }; int status; struct disambiguate_state ds; int quietly = !!(flags & GET_OID_QUIETLY); @@ -526,8 +505,10 @@ static enum get_oid_result get_short_oid(struct repository *r, else ds.fn = default_disambiguate_hint; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + opts.prefix = &ds.bin_pfx; + opts.prefix_hex_len = ds.len; + + odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -537,8 +518,7 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - find_short_object_filename(&ds); - find_short_packed_object(&ds); + odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); status = finish_object_disambiguation(&ds, oid); } From e9b7caa1b14bc1fe825b216941a0655d6afdffe5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:35 +0100 Subject: [PATCH 10/15] object-name: merge `update_candidates()` and `match_prefix()` There's only a single callsite for `match_prefix()`, and that function is a rather trivial wrapper of `update_candidates()`. Merge these two functions into a single `update_disambiguate_state()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/object-name.c b/object-name.c index 7a224ab4af..f55a332032 100644 --- a/object-name.c +++ b/object-name.c @@ -51,27 +51,31 @@ struct disambiguate_state { unsigned always_call_fn:1; }; -static void update_candidates(struct disambiguate_state *ds, const struct object_id *current) +static int update_disambiguate_state(const struct object_id *current, + struct object_info *oi UNUSED, + void *cb_data) { + struct disambiguate_state *ds = cb_data; + /* The hash algorithm of current has already been filtered */ if (ds->always_call_fn) { ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return; + return ds->ambiguous; } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); ds->candidate_exists = 1; - return; + return 0; } else if (oideq(&ds->candidate, current)) { /* the same as what we already have seen */ - return; + return 0; } if (!ds->fn) { /* cannot disambiguate between ds->candidate and current */ ds->ambiguous = 1; - return; + return ds->ambiguous; } if (!ds->candidate_checked) { @@ -84,7 +88,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* discard the candidate; we know it does not satisfy fn */ oidcpy(&ds->candidate, current); ds->candidate_checked = 0; - return; + return 0; } /* if we reach this point, we know ds->candidate satisfies fn */ @@ -95,17 +99,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object */ ds->candidate_ok = 0; ds->ambiguous = 1; + return ds->ambiguous; } /* otherwise, current can be discarded and candidate is still good */ -} -static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) -{ - struct disambiguate_state *ds = arg; - /* no need to call match_hash, oidtree_each did prefix match */ - update_candidates(ds, oid); - return ds->ambiguous; + return 0; } static void find_short_object_filename(struct disambiguate_state *ds) @@ -117,7 +116,8 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); + odb_source_loose_for_each_object(source, NULL, update_disambiguate_state, + ds, &opts); } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -508,7 +508,8 @@ static enum get_oid_result get_short_oid(struct repository *r, opts.prefix = &ds.bin_pfx; opts.prefix_hex_len = ds.len; - odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -518,7 +519,8 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); } From 67f47eab61c3a2c14f2d0351c3844f12fbd95dd2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:36 +0100 Subject: [PATCH 11/15] object-name: abbreviate loose object names without `disambiguate_state` The function `find_short_object_filename()` takes an object ID and computes the minimum required object name length to make it unique. This is done by reusing the object disambiguation infrastructure, where we iterate through every loose object and then update the disambiguate state one by one. Ultimately, we don't care about the disambiguate state though. It is used because this infrastructure knows how to enumerate only those objects that match a given prefix. But now that we have extended the `odb_for_each_object()` function to do this for us we have an easier way to do this. Consequently, we really only use the disambiguate state now to propagate `struct min_abbrev_data`. Refactor the code and drop this indirection so that we use `struct min_abbrev_data` directly. This also allows us to drop some now-unused logic from the disambiguate infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 54 +++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/object-name.c b/object-name.c index f55a332032..d82fb49f39 100644 --- a/object-name.c +++ b/object-name.c @@ -48,7 +48,6 @@ struct disambiguate_state { unsigned candidate_ok:1; unsigned disambiguate_fn_used:1; unsigned ambiguous:1; - unsigned always_call_fn:1; }; static int update_disambiguate_state(const struct object_id *current, @@ -58,10 +57,6 @@ static int update_disambiguate_state(const struct object_id *current, struct disambiguate_state *ds = cb_data; /* The hash algorithm of current has already been filtered */ - if (ds->always_call_fn) { - ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return ds->ambiguous; - } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); @@ -107,19 +102,6 @@ static int update_disambiguate_state(const struct object_id *current, return 0; } -static void find_short_object_filename(struct disambiguate_state *ds) -{ - struct odb_for_each_object_options opts = { - .prefix = &ds->bin_pfx, - .prefix_hex_len = ds->len, - }; - struct odb_source *source; - - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - odb_source_loose_for_each_object(source, NULL, update_disambiguate_state, - ds, &opts); -} - static int finish_object_disambiguation(struct disambiguate_state *ds, struct object_id *oid) { @@ -632,11 +614,26 @@ static int extend_abbrev_len(const struct object_id *oid, return 0; } -static int repo_extend_abbrev_len(struct repository *r UNUSED, - const struct object_id *oid, - void *cb_data) +static int extend_abbrev_len_loose(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) { - return extend_abbrev_len(oid, cb_data); + struct min_abbrev_data *data = cb_data; + extend_abbrev_len(oid, data); + return 0; +} + +static void find_abbrev_len_loose(struct min_abbrev_data *mad) +{ + struct odb_for_each_object_options opts = { + .prefix = mad->oid, + .prefix_hex_len = mad->cur_len, + }; + struct odb_source *source; + + for (source = mad->repo->objects->sources; source; source = source->next) + odb_source_loose_for_each_object(source, NULL, extend_abbrev_len_loose, + mad, &opts); } static void find_abbrev_len_for_midx(struct multi_pack_index *m, @@ -752,9 +749,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct disambiguate_state ds; struct min_abbrev_data mad; - struct object_id oid_ret; const unsigned hexsz = algo->hexsz; if (len < 0) { @@ -794,16 +789,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.oid = oid; find_abbrev_len_packed(&mad); - - if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0) - return -1; - - ds.fn = repo_extend_abbrev_len; - ds.always_call_fn = 1; - ds.cb_data = (void *)&mad; - - find_short_object_filename(&ds); - (void)finish_object_disambiguation(&ds, &oid_ret); + find_abbrev_len_loose(&mad); hex[mad.cur_len] = 0; return mad.cur_len; From 1a2842d1b1e91d5e068d231cf4df4e783bdf9205 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:37 +0100 Subject: [PATCH 12/15] object-name: simplify computing common prefixes The function `extend_abbrev_len()` computes the length of common hex characters between two object IDs. This is done by: - Making the caller provide the `hex` string for the needle object ID. - Comparing every hex position of the haystack object ID with `get_hex_char_from_oid()`. Turning the binary representation into hex first is roundabout though: we can simply compare the binary representation and give some special attention to the final nibble. Introduce a new function `oid_common_prefix_hexlen()` that does exactly this and refactor the code to use the new function. This allows us to drop the `struct min_abbrev_data::hex` field. Furthermore, this function will be used in by some other callsites in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- hash.c | 18 ++++++++++++++++++ hash.h | 3 +++ object-name.c | 23 +++-------------------- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/hash.c b/hash.c index 553f2008ea..e925b9754e 100644 --- a/hash.c +++ b/hash.c @@ -317,3 +317,21 @@ const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) /* Otherwise use the default one. */ return algop; } + +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b) +{ + unsigned rawsz = hash_algos[a->algo].rawsz; + + for (unsigned i = 0; i < rawsz; i++) { + if (a->hash[i] == b->hash[i]) + continue; + + if ((a->hash[i] ^ b->hash[i]) & 0xf0) + return i * 2; + else + return i * 2 + 1; + } + + return rawsz * 2; +} diff --git a/hash.h b/hash.h index d51efce1d3..c082a53c9a 100644 --- a/hash.h +++ b/hash.h @@ -396,6 +396,9 @@ static inline int oideq(const struct object_id *oid1, const struct object_id *oi return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ); } +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b); + static inline void oidcpy(struct object_id *dst, const struct object_id *src) { memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ); diff --git a/object-name.c b/object-name.c index d82fb49f39..32e9c23e40 100644 --- a/object-name.c +++ b/object-name.c @@ -585,32 +585,16 @@ static unsigned msb(unsigned long val) struct min_abbrev_data { unsigned int init_len; unsigned int cur_len; - char *hex; struct repository *repo; const struct object_id *oid; }; -static inline char get_hex_char_from_oid(const struct object_id *oid, - unsigned int pos) -{ - static const char hex[] = "0123456789abcdef"; - - if ((pos & 1) == 0) - return hex[oid->hash[pos >> 1] >> 4]; - else - return hex[oid->hash[pos >> 1] & 0xf]; -} - static int extend_abbrev_len(const struct object_id *oid, struct min_abbrev_data *mad) { - unsigned int i = mad->init_len; - while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i)) - i++; - - if (mad->hex[i] && i >= mad->cur_len) - mad->cur_len = i + 1; - + unsigned len = oid_common_prefix_hexlen(oid, mad->oid); + if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len) + mad->cur_len = len + 1; return 0; } @@ -785,7 +769,6 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.repo = r; mad.init_len = len; mad.cur_len = len; - mad.hex = hex; mad.oid = oid; find_abbrev_len_packed(&mad); From ab3ab1038dd38d2be62e3bacf39a3248929a7a98 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:38 +0100 Subject: [PATCH 13/15] object-name: move logic to compute loose abbreviation length The function `repo_find_unique_abbrev_r()` takes as input an object ID as well as a minimum object ID length and returns the minimum required prefix to make the object ID unique. The logic that computes the abbreviation length for loose objects is deeply tied to the loose object storage format. As such, it would fail in case a different object storage format was used. Prepare for making this logic generic to the backend by moving the logic into a new `odb_source_loose_find_abbrev_len()` function that is part of "object-file.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 38 ++++++++++++++++++++++++++++++++++++++ object-file.h | 12 ++++++++++++ object-name.c | 27 ++++----------------------- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index 13732f324f..4f77ce0982 100644 --- a/object-file.c +++ b/object-file.c @@ -1952,6 +1952,44 @@ out: return ret; } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index f11ad58f6c..3686f182e4 100644 --- a/object-file.h +++ b/object-file.h @@ -146,6 +146,18 @@ int odb_source_loose_count_objects(struct odb_source *source, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Find the shortest unique prefix for the given object ID, where `min_len` is + * the minimum length that the prefix should have. + * + * Returns 0 on success, in which case the computed length will be written to + * `out`. Otherwise, a negative error code is returned. + */ +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/object-name.c b/object-name.c index 32e9c23e40..4e21dbfa97 100644 --- a/object-name.c +++ b/object-name.c @@ -598,28 +598,6 @@ static int extend_abbrev_len(const struct object_id *oid, return 0; } -static int extend_abbrev_len_loose(const struct object_id *oid, - struct object_info *oi UNUSED, - void *cb_data) -{ - struct min_abbrev_data *data = cb_data; - extend_abbrev_len(oid, data); - return 0; -} - -static void find_abbrev_len_loose(struct min_abbrev_data *mad) -{ - struct odb_for_each_object_options opts = { - .prefix = mad->oid, - .prefix_hex_len = mad->cur_len, - }; - struct odb_source *source; - - for (source = mad->repo->objects->sources; source; source = source->next) - odb_source_loose_for_each_object(source, NULL, extend_abbrev_len_loose, - mad, &opts); -} - static void find_abbrev_len_for_midx(struct multi_pack_index *m, struct min_abbrev_data *mad) { @@ -772,7 +750,10 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.oid = oid; find_abbrev_len_packed(&mad); - find_abbrev_len_loose(&mad); + + odb_prepare_alternates(r->objects); + for (struct odb_source *s = r->objects->sources; s; s = s->next) + odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len); hex[mad.cur_len] = 0; return mad.cur_len; From 6c2ede6e4abed754bb5891c2904212c05efcfb11 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:39 +0100 Subject: [PATCH 14/15] object-file: move logic to compute packed abbreviation length Same as the preceding commit, move the logic that computes the minimum required prefix length to make a given object ID unique for the packfile store into a new function `packfile_store_find_abbrev_len()` that is part of "packfile.c". This prepares for making the logic fully generic via pluggable object databases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 135 +++++--------------------------------------------- packfile.c | 111 +++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ 3 files changed, 128 insertions(+), 123 deletions(-) diff --git a/object-name.c b/object-name.c index 4e21dbfa97..bb2294a193 100644 --- a/object-name.c +++ b/object-name.c @@ -582,115 +582,6 @@ static unsigned msb(unsigned long val) return r; } -struct min_abbrev_data { - unsigned int init_len; - unsigned int cur_len; - struct repository *repo; - const struct object_id *oid; -}; - -static int extend_abbrev_len(const struct object_id *oid, - struct min_abbrev_data *mad) -{ - unsigned len = oid_common_prefix_hexlen(oid, mad->oid); - if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len) - mad->cur_len = len + 1; - return 0; -} - -static void find_abbrev_len_for_midx(struct multi_pack_index *m, - struct min_abbrev_data *mad) -{ - for (; m; m = m->base_midx) { - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - mad_oid = mad->oid; - match = bsearch_one_midx(mad_oid, m, &first); - - /* - * first is now the position in the packfile where we - * would insert mad->hash if it does not exist (or the - * position of mad->hash if it does exist). Hence, we - * consider a maximum of two objects nearby for the - * abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (nth_midxed_object_oid(&oid, m, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (nth_midxed_object_oid(&oid, m, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (nth_midxed_object_oid(&oid, m, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; - } -} - -static void find_abbrev_len_for_pack(struct packed_git *p, - struct min_abbrev_data *mad) -{ - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - mad_oid = mad->oid; - match = bsearch_pack(mad_oid, p, &first); - - /* - * first is now the position in the packfile where we would insert - * mad->hash if it does not exist (or the position of mad->hash if - * it does exist). Hence, we consider a maximum of two objects - * nearby for the abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (!nth_packed_object_id(&oid, p, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (!nth_packed_object_id(&oid, p, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (!nth_packed_object_id(&oid, p, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; -} - -static void find_abbrev_len_packed(struct min_abbrev_data *mad) -{ - struct packed_git *p; - - odb_prepare_alternates(mad->repo->objects); - for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - find_abbrev_len_for_midx(m, mad); - } - - repo_for_each_pack(mad->repo, p) - find_abbrev_len_for_pack(p, mad); -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -707,14 +598,14 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid, } int repo_find_unique_abbrev_r(struct repository *r, char *hex, - const struct object_id *oid, int len) + const struct object_id *oid, int min_len) { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct min_abbrev_data mad; const unsigned hexsz = algo->hexsz; + unsigned len; - if (len < 0) { + if (min_len < 0) { unsigned long count; if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) @@ -738,25 +629,23 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, */ if (len < FALLBACK_DEFAULT_ABBREV) len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_len; } oid_to_hex_r(hex, oid); if (len >= hexsz || !len) return hexsz; - mad.repo = r; - mad.init_len = len; - mad.cur_len = len; - mad.oid = oid; - - find_abbrev_len_packed(&mad); - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) - odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len); + for (struct odb_source *s = r->objects->sources; s; s = s->next) { + struct odb_source_files *files = odb_source_files_downcast(s); + packfile_store_find_abbrev_len(files->packed, oid, len, &len); + odb_source_loose_find_abbrev_len(s, oid, len, &len); + } - hex[mad.cur_len] = 0; - return mad.cur_len; + hex[len] = 0; + return len; } const char *repo_find_unique_abbrev(struct repository *r, diff --git a/packfile.c b/packfile.c index 2539a371c1..ee9c7ea1d1 100644 --- a/packfile.c +++ b/packfile.c @@ -2597,6 +2597,117 @@ out: return ret; } +static int extend_abbrev_len(const struct object_id *a, + const struct object_id *b, + unsigned *out) +{ + unsigned len = oid_common_prefix_hexlen(a, b); + if (len != hash_algos[a->algo].hexsz && len >= *out) + *out = len + 1; + return 0; +} + +static void find_abbrev_len_for_midx(struct multi_pack_index *m, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + unsigned len = min_len; + + for (; m; m = m->base_midx) { + int match = 0; + uint32_t num, first = 0; + struct object_id found_oid; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + match = bsearch_one_midx(oid, m, &first); + + /* + * first is now the position in the packfile where we + * would insert the object ID if it does not exist (or the + * position of the object ID if it does exist). Hence, we + * consider a maximum of two objects nearby for the + * abbreviation length. + */ + + if (!match) { + if (nth_midxed_object_oid(&found_oid, m, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&found_oid, m, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (nth_midxed_object_oid(&found_oid, m, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + } + + *out = len; +} + +static void find_abbrev_len_for_pack(struct packed_git *p, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + int match; + uint32_t num, first = 0; + struct object_id found_oid; + unsigned len = min_len; + + num = p->num_objects; + match = bsearch_pack(oid, p, &first); + + /* + * first is now the position in the packfile where we would insert + * the object ID if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. + */ + if (!match) { + if (!nth_packed_object_id(&found_oid, p, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (!nth_packed_object_id(&found_oid, p, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (!nth_packed_object_id(&found_oid, p, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + + *out = len; +} + +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + + m = get_multi_pack_index(store->source); + if (m) + find_abbrev_len_for_midx(m, oid, min_len, &min_len); + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + if (open_pack_index(e->pack) || !e->pack->num_objects) + continue; + + find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len); + } + + *out = min_len; + return 0; +} + struct add_promisor_object_data { struct repository *repo; struct oidset *set; diff --git a/packfile.h b/packfile.h index fa41dfda38..45b35973f0 100644 --- a/packfile.h +++ b/packfile.h @@ -369,6 +369,11 @@ int packfile_store_for_each_object(struct packfile_store *store, void *cb_data, const struct odb_for_each_object_options *opts); +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From 83869e15fa9ef3b0ea2adbfe2fe68a309f95b856 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:40 +0100 Subject: [PATCH 15/15] odb: introduce generic `odb_find_abbrev_len()` Introduce a new generic `odb_find_abbrev_len()` function as well as source-specific callback functions. This makes the logic to compute the required prefix length to make a given object unique fully pluggable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 57 +++--------------------------------- odb.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ odb.h | 16 ++++++++++ odb/source-files.c | 25 ++++++++++++++++ odb/source.h | 24 +++++++++++++++ 5 files changed, 142 insertions(+), 53 deletions(-) diff --git a/object-name.c b/object-name.c index bb2294a193..f6e1f29e1f 100644 --- a/object-name.c +++ b/object-name.c @@ -15,10 +15,9 @@ #include "refs.h" #include "remote.h" #include "dir.h" +#include "odb.h" #include "oid-array.h" -#include "packfile.h" #include "pretty.h" -#include "object-file.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" @@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, return ret; } -/* - * Return the slot of the most-significant bit set in "val". There are various - * ways to do this quickly with fls() or __builtin_clzl(), but speed is - * probably not a big deal here. - */ -static unsigned msb(unsigned long val) -{ - unsigned r = 0; - while (val >>= 1) - r++; - return r; -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - const unsigned hexsz = algo->hexsz; unsigned len; - if (min_len < 0) { - unsigned long count; - - if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) - count = 0; - - /* - * Add one because the MSB only tells us the highest bit set, - * not including the value of all the _other_ bits (so "15" - * is only one off of 2^4, but the MSB is the 3rd bit. - */ - len = msb(count) + 1; - /* - * We now know we have on the order of 2^len objects, which - * expects a collision at 2^(len/2). But we also care about hex - * chars, not bits, and there are 4 bits per hex. So all - * together we need to divide by 2 and round up. - */ - len = DIV_ROUND_UP(len, 2); - /* - * For very small repos, we stick with our regular fallback. - */ - if (len < FALLBACK_DEFAULT_ABBREV) - len = FALLBACK_DEFAULT_ABBREV; - } else { - len = min_len; - } + if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0) + len = algo->hexsz; oid_to_hex_r(hex, oid); - if (len >= hexsz || !len) - return hexsz; - - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) { - struct odb_source_files *files = odb_source_files_downcast(s); - packfile_store_find_abbrev_len(files->packed, oid, len, &len); - odb_source_loose_find_abbrev_len(s, oid, len, &len); - } - hex[len] = 0; + return len; } diff --git a/odb.c b/odb.c index 3019957b87..3f94a53df1 100644 --- a/odb.c +++ b/odb.c @@ -12,6 +12,7 @@ #include "midx.h" #include "object-file-convert.h" #include "object-file.h" +#include "object-name.h" #include "odb.h" #include "packfile.h" #include "path.h" @@ -964,6 +965,78 @@ out: return ret; } +/* + * Return the slot of the most-significant bit set in "val". There are various + * ways to do this quickly with fls() or __builtin_clzl(), but speed is + * probably not a big deal here. + */ +static unsigned msb(unsigned long val) +{ + unsigned r = 0; + while (val >>= 1) + r++; + return r; +} + +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_length, + unsigned *out) +{ + const struct git_hash_algo *algo = + oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo; + const unsigned hexsz = algo->hexsz; + unsigned len; + int ret; + + if (min_length < 0) { + unsigned long count; + + if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + + /* + * Add one because the MSB only tells us the highest bit set, + * not including the value of all the _other_ bits (so "15" + * is only one off of 2^4, but the MSB is the 3rd bit. + */ + len = msb(count) + 1; + /* + * We now know we have on the order of 2^len objects, which + * expects a collision at 2^(len/2). But we also care about hex + * chars, not bits, and there are 4 bits per hex. So all + * together we need to divide by 2 and round up. + */ + len = DIV_ROUND_UP(len, 2); + /* + * For very small repos, we stick with our regular fallback. + */ + if (len < FALLBACK_DEFAULT_ABBREV) + len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_length; + } + + if (len >= hexsz || !len) { + *out = hexsz; + ret = 0; + goto out; + } + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + ret = odb_source_find_abbrev_len(source, oid, len, &len); + if (ret) + goto out; + } + + ret = 0; + *out = len; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index e80fd8f7ab..984bafca9d 100644 --- a/odb.h +++ b/odb.h @@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Given an object ID, find the minimum required length required to make the + * object ID unique across the whole object database. + * + * The `min_len` determines the minimum abbreviated length that'll be returned + * by this function. If `min_len < 0`, then the function will set a sensible + * default minimum abbreviation length. + * + * Returns 0 on success, a negative error code otherwise. The computed length + * will be assigned to `*out`. + */ +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_len, + unsigned *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/odb/source-files.c b/odb/source-files.c index e90bb689bb..76797569de 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -122,6 +122,30 @@ out: return ret; } +static int odb_source_files_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned len = min_len; + int ret; + + ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len); + if (ret < 0) + goto out; + + ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + if (ret < 0) + goto out; + + *out = len; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; + files->base.find_abbrev_len = odb_source_files_find_abbrev_len; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index ee5d6ed530..a9d7d0b96f 100644 --- a/odb/source.h +++ b/odb/source.h @@ -157,6 +157,18 @@ struct odb_source { enum odb_count_objects_flags flags, unsigned long *out); + /* + * This callback is expected to find the minimum required length to + * make the given object ID unique. + * + * The callback is expected to return a negative error code in case it + * failed, 0 otherwise. + */ + int (*find_abbrev_len)(struct odb_source *source, + const struct object_id *oid, + unsigned min_length, + unsigned *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source, return source->count_objects(source, flags, out); } +/* + * Determine the minimum required length to make the given object ID unique in + * the given source. Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + return source->find_abbrev_len(source, oid, min_len, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does