diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b6f12f41d6..cd13a3a89f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -848,6 +848,9 @@ static void batch_each_object(struct batch_options *opt, .callback = callback, .payload = _payload, }; + struct odb_for_each_object_options opts = { + .flags = flags, + }; struct bitmap_index *bitmap = NULL; struct odb_source *source; @@ -860,7 +863,7 @@ static void batch_each_object(struct batch_options *opt, odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, flags); + &payload, &opts); if (ret) break; } @@ -884,7 +887,7 @@ static void batch_each_object(struct batch_options *opt, for (source = the_repository->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); int ret = packfile_store_for_each_object(files->packed, &oi, - batch_one_object_oi, &payload, flags); + batch_one_object_oi, &payload, &opts); if (ret) break; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index da1087930c..88388254d9 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4359,6 +4359,12 @@ static void add_objects_in_unpacked_packs(void) { struct odb_source *source; time_t mtime; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS, + }; struct object_info oi = { .mtimep = &mtime, }; @@ -4371,11 +4377,7 @@ static void add_objects_in_unpacked_packs(void) continue; if (packfile_store_for_each_object(files->packed, &oi, - add_object_in_unpacked_pack, NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + add_object_in_unpacked_pack, NULL, &opts)) die(_("cannot open pack index")); } } diff --git a/cbtree.c b/cbtree.c index cf8cf75b89..4ab794bddc 100644 --- a/cbtree.c +++ b/cbtree.c @@ -96,26 +96,28 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) return p && !memcmp(p->k, k, klen) ? p : NULL; } -static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg) +static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) { if (1 & (uintptr_t)p) { struct cb_node *q = cb_node_of(p); - enum cb_next n = cb_descend(q->child[0], fn, arg); - - return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg); + int ret = cb_descend(q->child[0], fn, arg); + if (ret) + return ret; + return cb_descend(q->child[1], fn, arg); } else { return fn(p, arg); } } -void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, - cb_iter fn, void *arg) +int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, + cb_iter fn, void *arg) { struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; - if (!p) return; /* empty tree */ + if (!p) + return 0; /* empty tree */ /* Walk tree, maintaining top pointer */ while (1 & (uintptr_t)p) { @@ -130,7 +132,8 @@ void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, for (i = 0; i < klen; i++) { if (p->k[i] != kpfx[i]) - return; /* "best" match failed */ + return 0; /* "best" match failed */ } - cb_descend(top, fn, arg); + + return cb_descend(top, fn, arg); } diff --git a/cbtree.h b/cbtree.h index 43193abdda..c374b1b3db 100644 --- a/cbtree.h +++ b/cbtree.h @@ -30,11 +30,6 @@ struct cb_tree { struct cb_node *root; }; -enum cb_next { - CB_CONTINUE = 0, - CB_BREAK = 1 -}; - #define CBTREE_INIT { 0 } static inline void cb_init(struct cb_tree *t) @@ -46,9 +41,15 @@ static inline void cb_init(struct cb_tree *t) struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen); struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen); -typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg); +/* + * Callback invoked by `cb_each()` for each node in the critbit tree. A return + * value of 0 will cause the iteration to continue, a non-zero return code will + * cause iteration to abort. The error code will be relayed back from + * `cb_each()` in that case. + */ +typedef int (*cb_iter)(struct cb_node *, void *arg); -void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, - cb_iter, void *arg); +int cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, + cb_iter, void *arg); #endif /* CBTREE_H */ diff --git a/commit-graph.c b/commit-graph.c index c030003330..df4b4a125e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1969,6 +1969,9 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { struct odb_source *source; enum object_type type; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER, + }; struct object_info oi = { .typep = &type, }; @@ -1983,7 +1986,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) for (source = ctx->r->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi, - ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + ctx, &opts); } if (ctx->progress_done < ctx->approx_nr_objects) diff --git a/hash.c b/hash.c index 553f2008ea..e925b9754e 100644 --- a/hash.c +++ b/hash.c @@ -317,3 +317,21 @@ const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) /* Otherwise use the default one. */ return algop; } + +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b) +{ + unsigned rawsz = hash_algos[a->algo].rawsz; + + for (unsigned i = 0; i < rawsz; i++) { + if (a->hash[i] == b->hash[i]) + continue; + + if ((a->hash[i] ^ b->hash[i]) & 0xf0) + return i * 2; + else + return i * 2 + 1; + } + + return rawsz * 2; +} diff --git a/hash.h b/hash.h index d51efce1d3..c082a53c9a 100644 --- a/hash.h +++ b/hash.h @@ -396,6 +396,9 @@ static inline int oideq(const struct object_id *oid1, const struct object_id *oi return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ); } +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b); + static inline void oidcpy(struct object_id *dst, const struct object_id *src) { memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ); diff --git a/object-file.c b/object-file.c index f0b029ff0b..4f77ce0982 100644 --- a/object-file.c +++ b/object-file.c @@ -33,6 +33,9 @@ /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); + static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -1845,11 +1848,28 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } } +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_loose_read_object_info(data->source, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct for_each_object_wrapper_data data = { .source = source, @@ -1859,11 +1879,16 @@ int odb_source_loose_for_each_object(struct odb_source *source, }; /* There are no loose promisor objects, so we can return immediately. */ - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) return 0; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(source, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, NULL, NULL, &data); } @@ -1914,9 +1939,10 @@ int odb_source_loose_count_objects(struct odb_source *source, *out = count * 256; ret = 0; } else { + struct odb_for_each_object_options opts = { 0 }; *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, 0); + out, &opts); } out: @@ -1926,6 +1952,44 @@ out: return ret; } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) @@ -1934,8 +1998,8 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; diff --git a/object-file.h b/object-file.h index f8d8805a18..3686f182e4 100644 --- a/object-file.h +++ b/object-file.h @@ -74,13 +74,6 @@ int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); -/* - * Populate and return the loose object cache array corresponding to the - * given object ID. - */ -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. @@ -137,7 +130,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * Count the number of loose objects in this source. @@ -153,6 +146,18 @@ int odb_source_loose_count_objects(struct odb_source *source, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Find the shortest unique prefix for the given object ID, where `min_len` is + * the minimum length that the prefix should have. + * + * Returns 0 on success, in which case the computed length will be written to + * `out`. Otherwise, a negative error code is returned. + */ +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/object-name.c b/object-name.c index 84a34b83ba..21dcdc4a0e 100644 --- a/object-name.c +++ b/object-name.c @@ -15,11 +15,9 @@ #include "refs.h" #include "remote.h" #include "dir.h" +#include "odb.h" #include "oid-array.h" -#include "oidtree.h" -#include "packfile.h" #include "pretty.h" -#include "object-file.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" @@ -49,30 +47,29 @@ struct disambiguate_state { unsigned candidate_ok:1; unsigned disambiguate_fn_used:1; unsigned ambiguous:1; - unsigned always_call_fn:1; }; -static void update_candidates(struct disambiguate_state *ds, const struct object_id *current) +static int update_disambiguate_state(const struct object_id *current, + struct object_info *oi UNUSED, + void *cb_data) { + struct disambiguate_state *ds = cb_data; + /* The hash algorithm of current has already been filtered */ - if (ds->always_call_fn) { - ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return; - } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); ds->candidate_exists = 1; - return; + return 0; } else if (oideq(&ds->candidate, current)) { /* the same as what we already have seen */ - return; + return 0; } if (!ds->fn) { /* cannot disambiguate between ds->candidate and current */ ds->ambiguous = 1; - return; + return ds->ambiguous; } if (!ds->candidate_checked) { @@ -85,7 +82,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* discard the candidate; we know it does not satisfy fn */ oidcpy(&ds->candidate, current); ds->candidate_checked = 0; - return; + return 0; } /* if we reach this point, we know ds->candidate satisfies fn */ @@ -96,128 +93,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object */ ds->candidate_ok = 0; ds->ambiguous = 1; + return ds->ambiguous; } /* otherwise, current can be discarded and candidate is still good */ -} -static int match_hash(unsigned, const unsigned char *, const unsigned char *); - -static enum cb_next match_prefix(const struct object_id *oid, void *arg) -{ - struct disambiguate_state *ds = arg; - /* no need to call match_hash, oidtree_each did prefix match */ - update_candidates(ds, oid); - return ds->ambiguous ? CB_BREAK : CB_CONTINUE; -} - -static void find_short_object_filename(struct disambiguate_state *ds) -{ - struct odb_source *source; - - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), - &ds->bin_pfx, ds->len, match_prefix, ds); -} - -static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) -{ - do { - if (*a != *b) - return 0; - a++; - b++; - len -= 2; - } while (len > 1); - if (len) - if ((*a ^ *b) & 0xf0) - return 0; - return 1; -} - -static void unique_in_midx(struct multi_pack_index *m, - struct disambiguate_state *ds) -{ - for (; m; m = m->base_midx) { - uint32_t num, i, first = 0; - const struct object_id *current = NULL; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - - bsearch_one_midx(&ds->bin_pfx, m, &first); - - /* - * At this point, "first" is the location of the lowest - * object with an object name that could match - * "bin_pfx". See if we have 0, 1 or more objects that - * actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - current = nth_midxed_object_oid(&oid, m, i); - if (!match_hash(len, ds->bin_pfx.hash, current->hash)) - break; - update_candidates(ds, current); - } - } -} - -static void unique_in_pack(struct packed_git *p, - struct disambiguate_state *ds) -{ - uint32_t num, i, first = 0; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - bsearch_pack(&ds->bin_pfx, p, &first); - - /* - * At this point, "first" is the location of the lowest object - * with an object name that could match "bin_pfx". See if we have - * 0, 1 or more objects that actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - nth_packed_object_id(&oid, p, i); - if (!match_hash(len, ds->bin_pfx.hash, oid.hash)) - break; - update_candidates(ds, &oid); - } -} - -static void find_short_packed_object(struct disambiguate_state *ds) -{ - struct odb_source *source; - struct packed_git *p; - - /* Skip, unless oids from the storage hash algorithm are wanted */ - if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) - return; - - odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - unique_in_midx(m, ds); - } - - repo_for_each_pack(ds->repo, p) { - if (ds->ambiguous) - break; - unique_in_pack(p, ds); - } + return 0; } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -348,41 +229,57 @@ int set_disambiguate_hint_config(const char *var, const char *value) return error("unknown hint type for '%s': %s", var, value); } +static int parse_oid_prefix(const char *name, int len, + const struct git_hash_algo *algo, + char *hex_out, + struct object_id *oid_out) +{ + for (int i = 0; i < len; i++) { + unsigned char c = name[i]; + unsigned char val; + if (c >= '0' && c <= '9') { + val = c - '0'; + } else if (c >= 'a' && c <= 'f') { + val = c - 'a' + 10; + } else if (c >= 'A' && c <='F') { + val = c - 'A' + 10; + c -= 'A' - 'a'; + } else { + return -1; + } + + if (hex_out) + hex_out[i] = c; + if (oid_out) { + if (!(i & 1)) + val <<= 4; + oid_out->hash[i >> 1] |= val; + } + } + + if (hex_out) + hex_out[len] = '\0'; + if (oid_out) + oid_out->algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; + + return 0; +} + static int init_object_disambiguation(struct repository *r, const char *name, int len, const struct git_hash_algo *algo, struct disambiguate_state *ds) { - int i; - if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ) return -1; memset(ds, 0, sizeof(*ds)); - for (i = 0; i < len ;i++) { - unsigned char c = name[i]; - unsigned char val; - if (c >= '0' && c <= '9') - val = c - '0'; - else if (c >= 'a' && c <= 'f') - val = c - 'a' + 10; - else if (c >= 'A' && c <='F') { - val = c - 'A' + 10; - c -= 'A' - 'a'; - } - else - return -1; - ds->hex_pfx[i] = c; - if (!(i & 1)) - val <<= 4; - ds->bin_pfx.hash[i >> 1] |= val; - } + if (parse_oid_prefix(name, len, algo, ds->hex_pfx, &ds->bin_pfx) < 0) + return -1; ds->len = len; - ds->hex_pfx[len] = '\0'; ds->repo = r; - ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; odb_prepare_alternates(r->objects); return 0; } @@ -510,8 +407,8 @@ static int collect_ambiguous(const struct object_id *oid, void *data) return 0; } -static int repo_collect_ambiguous(struct repository *r UNUSED, - const struct object_id *oid, +static int repo_collect_ambiguous(const struct object_id *oid, + struct object_info *oi UNUSED, void *data) { return collect_ambiguous(oid, data); @@ -561,6 +458,7 @@ static enum get_oid_result get_short_oid(struct repository *r, struct object_id *oid, unsigned flags) { + struct odb_for_each_object_options opts = { 0 }; int status; struct disambiguate_state ds; int quietly = !!(flags & GET_OID_QUIETLY); @@ -588,8 +486,11 @@ static enum get_oid_result get_short_oid(struct repository *r, else ds.fn = default_disambiguate_hint; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + opts.prefix = &ds.bin_pfx; + opts.prefix_hex_len = ds.len; + + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -599,8 +500,8 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - find_short_object_filename(&ds); - find_short_packed_object(&ds); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); } @@ -648,169 +549,25 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, const struct git_hash_algo *algo, each_abbrev_fn fn, void *cb_data) { + struct object_id prefix_oid = { 0 }; + struct odb_for_each_object_options opts = { + .prefix = &prefix_oid, + .prefix_hex_len = strlen(prefix), + }; struct oid_array collect = OID_ARRAY_INIT; - struct disambiguate_state ds; int ret; - if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0) + if (parse_oid_prefix(prefix, opts.prefix_hex_len, algo, NULL, &prefix_oid) < 0) return -1; - ds.always_call_fn = 1; - ds.fn = repo_collect_ambiguous; - ds.cb_data = &collect; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + if (odb_for_each_object_ext(r->objects, NULL, repo_collect_ambiguous, &collect, &opts) < 0) + return -1; ret = oid_array_for_each_unique(&collect, fn, cb_data); oid_array_clear(&collect); return ret; } -/* - * Return the slot of the most-significant bit set in "val". There are various - * ways to do this quickly with fls() or __builtin_clzl(), but speed is - * probably not a big deal here. - */ -static unsigned msb(unsigned long val) -{ - unsigned r = 0; - while (val >>= 1) - r++; - return r; -} - -struct min_abbrev_data { - unsigned int init_len; - unsigned int cur_len; - char *hex; - struct repository *repo; - const struct object_id *oid; -}; - -static inline char get_hex_char_from_oid(const struct object_id *oid, - unsigned int pos) -{ - static const char hex[] = "0123456789abcdef"; - - if ((pos & 1) == 0) - return hex[oid->hash[pos >> 1] >> 4]; - else - return hex[oid->hash[pos >> 1] & 0xf]; -} - -static int extend_abbrev_len(const struct object_id *oid, - struct min_abbrev_data *mad) -{ - unsigned int i = mad->init_len; - while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i)) - i++; - - if (mad->hex[i] && i >= mad->cur_len) - mad->cur_len = i + 1; - - return 0; -} - -static int repo_extend_abbrev_len(struct repository *r UNUSED, - const struct object_id *oid, - void *cb_data) -{ - return extend_abbrev_len(oid, cb_data); -} - -static void find_abbrev_len_for_midx(struct multi_pack_index *m, - struct min_abbrev_data *mad) -{ - for (; m; m = m->base_midx) { - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - mad_oid = mad->oid; - match = bsearch_one_midx(mad_oid, m, &first); - - /* - * first is now the position in the packfile where we - * would insert mad->hash if it does not exist (or the - * position of mad->hash if it does exist). Hence, we - * consider a maximum of two objects nearby for the - * abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (nth_midxed_object_oid(&oid, m, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (nth_midxed_object_oid(&oid, m, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (nth_midxed_object_oid(&oid, m, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; - } -} - -static void find_abbrev_len_for_pack(struct packed_git *p, - struct min_abbrev_data *mad) -{ - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - mad_oid = mad->oid; - match = bsearch_pack(mad_oid, p, &first); - - /* - * first is now the position in the packfile where we would insert - * mad->hash if it does not exist (or the position of mad->hash if - * it does exist). Hence, we consider a maximum of two objects - * nearby for the abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (!nth_packed_object_id(&oid, p, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (!nth_packed_object_id(&oid, p, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (!nth_packed_object_id(&oid, p, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; -} - -static void find_abbrev_len_packed(struct min_abbrev_data *mad) -{ - struct packed_git *p; - - odb_prepare_alternates(mad->repo->objects); - for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - find_abbrev_len_for_midx(m, mad); - } - - repo_for_each_pack(mad->repo, p) - find_abbrev_len_for_pack(p, mad); -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -827,65 +584,19 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid, } int repo_find_unique_abbrev_r(struct repository *r, char *hex, - const struct object_id *oid, int len) + const struct object_id *oid, int min_len) { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct disambiguate_state ds; - struct min_abbrev_data mad; - struct object_id oid_ret; - const unsigned hexsz = algo->hexsz; + unsigned len; - if (len < 0) { - unsigned long count; - - if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) - count = 0; - - /* - * Add one because the MSB only tells us the highest bit set, - * not including the value of all the _other_ bits (so "15" - * is only one off of 2^4, but the MSB is the 3rd bit. - */ - len = msb(count) + 1; - /* - * We now know we have on the order of 2^len objects, which - * expects a collision at 2^(len/2). But we also care about hex - * chars, not bits, and there are 4 bits per hex. So all - * together we need to divide by 2 and round up. - */ - len = DIV_ROUND_UP(len, 2); - /* - * For very small repos, we stick with our regular fallback. - */ - if (len < FALLBACK_DEFAULT_ABBREV) - len = FALLBACK_DEFAULT_ABBREV; - } + if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0) + len = algo->hexsz; oid_to_hex_r(hex, oid); - if (len >= hexsz || !len) - return hexsz; + hex[len] = 0; - mad.repo = r; - mad.init_len = len; - mad.cur_len = len; - mad.hex = hex; - mad.oid = oid; - - find_abbrev_len_packed(&mad); - - if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0) - return -1; - - ds.fn = repo_extend_abbrev_len; - ds.always_call_fn = 1; - ds.cb_data = (void *)&mad; - - find_short_object_filename(&ds); - (void)finish_object_disambiguation(&ds, &oid_ret); - - hex[mad.cur_len] = 0; - return mad.cur_len; + return len; } const char *repo_find_unique_abbrev(struct repository *r, diff --git a/odb.c b/odb.c index 350e23f3c0..3f94a53df1 100644 --- a/odb.c +++ b/odb.c @@ -12,6 +12,7 @@ #include "midx.h" #include "object-file-convert.h" #include "object-file.h" +#include "object-name.h" #include "odb.h" #include "packfile.h" #include "path.h" @@ -896,25 +897,37 @@ int odb_freshen_object(struct object_database *odb, return 0; } +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + if (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + continue; + + ret = odb_source_for_each_object(source, request, cb, cb_data, opts); + if (ret) + return ret; + } + + return 0; +} + int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, unsigned flags) { - int ret; - - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) { - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) - continue; - - ret = odb_source_for_each_object(source, request, cb, cb_data, flags); - if (ret) - return ret; - } - - return 0; + struct odb_for_each_object_options opts = { + .flags = flags, + }; + return odb_for_each_object_ext(odb, request, cb, cb_data, &opts); } int odb_count_objects(struct object_database *odb, @@ -952,6 +965,78 @@ out: return ret; } +/* + * Return the slot of the most-significant bit set in "val". There are various + * ways to do this quickly with fls() or __builtin_clzl(), but speed is + * probably not a big deal here. + */ +static unsigned msb(unsigned long val) +{ + unsigned r = 0; + while (val >>= 1) + r++; + return r; +} + +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_length, + unsigned *out) +{ + const struct git_hash_algo *algo = + oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo; + const unsigned hexsz = algo->hexsz; + unsigned len; + int ret; + + if (min_length < 0) { + unsigned long count; + + if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + + /* + * Add one because the MSB only tells us the highest bit set, + * not including the value of all the _other_ bits (so "15" + * is only one off of 2^4, but the MSB is the 3rd bit. + */ + len = msb(count) + 1; + /* + * We now know we have on the order of 2^len objects, which + * expects a collision at 2^(len/2). But we also care about hex + * chars, not bits, and there are 4 bits per hex. So all + * together we need to divide by 2 and round up. + */ + len = DIV_ROUND_UP(len, 2); + /* + * For very small repos, we stick with our regular fallback. + */ + if (len < FALLBACK_DEFAULT_ABBREV) + len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_length; + } + + if (len >= hexsz || !len) { + *out = hexsz; + ret = 0; + goto out; + } + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + ret = odb_source_find_abbrev_len(source, oid, len, &len); + if (ret) + goto out; + } + + ret = 0; + *out = len; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index 9aee260105..984bafca9d 100644 --- a/odb.h +++ b/odb.h @@ -481,6 +481,22 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Options that can be passed to `odb_for_each_object()` and its + * backend-specific implementations. + */ +struct odb_for_each_object_options { + /* A bitfield of `odb_for_each_object_flags`. */ + enum odb_for_each_object_flags flags; + + /* + * If set, only iterate through objects whose first `prefix_hex_len` + * hex characters matches the given prefix. + */ + const struct object_id *prefix; + size_t prefix_hex_len; +}; + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored @@ -495,6 +511,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, * Returns 0 on success, a negative error code in case a failure occurred, or * an arbitrary non-zero error code returned by the callback itself. */ +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts); + +/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, @@ -522,6 +545,22 @@ int odb_count_objects(struct object_database *odb, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Given an object ID, find the minimum required length required to make the + * object ID unique across the whole object database. + * + * The `min_len` determines the minimum abbreviated length that'll be returned + * by this function. If `min_len < 0`, then the function will set a sensible + * default minimum abbreviation length. + * + * Returns 0 on success, a negative error code otherwise. The computed length + * will be assigned to `*out`. + */ +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_len, + unsigned *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/odb/source-files.c b/odb/source-files.c index c08d8993e3..76797569de 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -75,18 +75,18 @@ static int odb_source_files_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct odb_source_files *files = odb_source_files_downcast(source); int ret; - if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, cb, cb_data, flags); + if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts); if (ret) return ret; } - ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, flags); + ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, opts); if (ret) return ret; @@ -122,6 +122,30 @@ out: return ret; } +static int odb_source_files_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned len = min_len; + int ret; + + ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len); + if (ret < 0) + goto out; + + ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + if (ret < 0) + goto out; + + *out = len; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; + files->base.find_abbrev_len = odb_source_files_find_abbrev_len; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index 96c906e7a1..a9d7d0b96f 100644 --- a/odb/source.h +++ b/odb/source.h @@ -140,7 +140,7 @@ struct odb_source { const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * This callback is expected to count objects in the given object @@ -157,6 +157,18 @@ struct odb_source { enum odb_count_objects_flags flags, unsigned long *out); + /* + * This callback is expected to find the minimum required length to + * make the given object ID unique. + * + * The callback is expected to return a negative error code in case it + * failed, 0 otherwise. + */ + int (*find_abbrev_len)(struct odb_source *source, + const struct object_id *oid, + unsigned min_length, + unsigned *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -343,9 +355,9 @@ static inline int odb_source_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { - return source->for_each_object(source, request, cb, cb_data, flags); + return source->for_each_object(source, request, cb, cb_data, opts); } /* @@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source, return source->count_objects(source, flags, out); } +/* + * Determine the minimum required length to make the given object ID unique in + * the given source. Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + return source->find_abbrev_len(source, oid, min_len, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does diff --git a/oidtree.c b/oidtree.c index 324de94934..ab9fe7ec7a 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,14 +6,6 @@ #include "oidtree.h" #include "hash.h" -struct oidtree_iter_data { - oidtree_iter fn; - void *arg; - size_t *last_nibble_at; - uint32_t algo; - uint8_t last_byte; -}; - void oidtree_init(struct oidtree *ot) { cb_init(&ot->tree); @@ -54,8 +46,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) cb_insert(&ot->tree, on, sizeof(*oid)); } - -int oidtree_contains(struct oidtree *ot, const struct object_id *oid) +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); @@ -69,41 +60,51 @@ int oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0; + return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); } -static enum cb_next iter(struct cb_node *n, void *arg) +struct oidtree_each_data { + oidtree_each_cb cb; + void *cb_data; + size_t *last_nibble_at; + uint32_t algo; + uint8_t last_byte; +}; + +static int iter(struct cb_node *n, void *cb_data) { - struct oidtree_iter_data *x = arg; + struct oidtree_each_data *data = cb_data; struct object_id k; /* Copy to provide 4-byte alignment needed by struct object_id. */ memcpy(&k, n->k, sizeof(k)); - if (x->algo != GIT_HASH_UNKNOWN && x->algo != k.algo) - return CB_CONTINUE; + if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) + return 0; - if (x->last_nibble_at) { - if ((k.hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0) - return CB_CONTINUE; + if (data->last_nibble_at) { + if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) + return 0; } - return x->fn(&k, x->arg); + return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *oid, - size_t oidhexsz, oidtree_iter fn, void *arg) +int oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { - size_t klen = oidhexsz / 2; - struct oidtree_iter_data x = { 0 }; - assert(oidhexsz <= GIT_MAX_HEXSZ); + struct oidtree_each_data data = { + .cb = cb, + .cb_data = cb_data, + .algo = prefix->algo, + }; + size_t klen = prefix_hex_len / 2; + assert(prefix_hex_len <= GIT_MAX_HEXSZ); - x.fn = fn; - x.arg = arg; - x.algo = oid->algo; - if (oidhexsz & 1) { - x.last_byte = oid->hash[klen]; - x.last_nibble_at = &klen; + if (prefix_hex_len & 1) { + data.last_byte = prefix->hash[klen]; + data.last_nibble_at = &klen; } - cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x); + + return cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 77898f510a..2b7bad2e60 100644 --- a/oidtree.h +++ b/oidtree.h @@ -5,18 +5,52 @@ #include "hash.h" #include "mem-pool.h" +/* + * OID trees are an efficient storage for object IDs that use a critbit tree + * internally. Common prefixes are duplicated and object IDs are stored in a + * way that allow easy iteration over the objects in lexicographic order. As a + * consequence, operations that want to enumerate all object IDs that match a + * given prefix can be answered efficiently. + * + * Note that it is not (yet) possible to store data other than the object IDs + * themselves in this tree. + */ struct oidtree { struct cb_tree tree; struct mem_pool mem_pool; }; -void oidtree_init(struct oidtree *); -void oidtree_clear(struct oidtree *); -void oidtree_insert(struct oidtree *, const struct object_id *); -int oidtree_contains(struct oidtree *, const struct object_id *); +/* Initialize the oidtree so that it is ready for use. */ +void oidtree_init(struct oidtree *ot); -typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data); -void oidtree_each(struct oidtree *, const struct object_id *, - size_t oidhexsz, oidtree_iter, void *data); +/* + * Release all memory associated with the oidtree and reinitialize it for + * subsequent use. + */ +void oidtree_clear(struct oidtree *ot); + +/* Insert the object ID into the tree. */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid); + +/* Check whether the tree contains the given object ID. */ +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); + +/* + * Callback function used for `oidtree_each()`. Returning a non-zero exit code + * will cause iteration to stop. The exit code will be propagated to the caller + * of `oidtree_each()`. + */ +typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); + +/* + * Iterate through all object IDs in the tree whose prefix matches the given + * object ID prefix and invoke the callback function on each of them. + * + * Returns any non-zero exit code from the provided callback function. + */ +int oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/packfile.c b/packfile.c index d4de9f3ffe..ee9c7ea1d1 100644 --- a/packfile.c +++ b/packfile.c @@ -2371,11 +2371,182 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, } } +static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) +{ + do { + if (*a != *b) + return 0; + a++; + b++; + len -= 2; + } while (len > 1); + if (len) + if ((*a ^ *b) & 0xf0) + return 0; + return 1; +} + +static int for_each_prefixed_object_in_midx( + struct packfile_store *store, + struct multi_pack_index *m, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + int ret; + + for (; m; m = m->base_midx) { + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > m->source->odb->repo->hash_algo->hexsz ? + m->source->odb->repo->hash_algo->hexsz : opts->prefix_hex_len; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + + bsearch_one_midx(opts->prefix, m, &first); + + /* + * At this point, "first" is the location of the lowest + * object with an object name that could match "opts->prefix". + * See if we have 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + const struct object_id *current = NULL; + struct object_id oid; + + current = nth_midxed_object_oid(&oid, m, i); + + if (!match_hash(len, opts->prefix->hash, current->hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, current, + &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + } + + ret = 0; + +out: + return ret; +} + +static int for_each_prefixed_object_in_pack( + struct packfile_store *store, + struct packed_git *p, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ? + p->repo->hash_algo->hexsz : opts->prefix_hex_len; + int ret; + + num = p->num_objects; + bsearch_pack(opts->prefix, p, &first); + + /* + * At this point, "first" is the location of the lowest object + * with an object name that could match "bin_pfx". See if we have + * 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + struct object_id oid; + + nth_packed_object_id(&oid, p, i); + if (!match_hash(len, opts->prefix->hash, oid.hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, &oid, &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +static int packfile_store_for_each_prefixed_object( + struct packfile_store *store, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + bool pack_errors = false; + int ret; + + if (opts->flags) + BUG("flags unsupported"); + + store->skip_mru_updates = true; + + m = get_multi_pack_index(store->source); + if (m) { + ret = for_each_prefixed_object_in_midx(store, m, opts, data); + if (ret) + goto out; + } + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + + if (open_pack_index(e->pack)) { + pack_errors = true; + continue; + } + + if (!e->pack->num_objects) + continue; + + ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data); + if (ret) + goto out; + } + + ret = 0; + +out: + store->skip_mru_updates = false; + if (!ret && pack_errors) + ret = -1; + return ret; +} + int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct packfile_store_for_each_object_wrapper_data data = { .store = store, @@ -2386,20 +2557,23 @@ int packfile_store_for_each_object(struct packfile_store *store, struct packfile_list_entry *e; int pack_errors = 0, ret; + if (opts->prefix) + return packfile_store_for_each_prefixed_object(store, opts, &data); + store->skip_mru_updates = true; for (e = packfile_store_get_packs(store); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2408,7 +2582,7 @@ int packfile_store_for_each_object(struct packfile_store *store, } ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, - &data, flags); + &data, opts->flags); if (ret) goto out; } @@ -2423,6 +2597,117 @@ out: return ret; } +static int extend_abbrev_len(const struct object_id *a, + const struct object_id *b, + unsigned *out) +{ + unsigned len = oid_common_prefix_hexlen(a, b); + if (len != hash_algos[a->algo].hexsz && len >= *out) + *out = len + 1; + return 0; +} + +static void find_abbrev_len_for_midx(struct multi_pack_index *m, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + unsigned len = min_len; + + for (; m; m = m->base_midx) { + int match = 0; + uint32_t num, first = 0; + struct object_id found_oid; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + match = bsearch_one_midx(oid, m, &first); + + /* + * first is now the position in the packfile where we + * would insert the object ID if it does not exist (or the + * position of the object ID if it does exist). Hence, we + * consider a maximum of two objects nearby for the + * abbreviation length. + */ + + if (!match) { + if (nth_midxed_object_oid(&found_oid, m, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&found_oid, m, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (nth_midxed_object_oid(&found_oid, m, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + } + + *out = len; +} + +static void find_abbrev_len_for_pack(struct packed_git *p, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + int match; + uint32_t num, first = 0; + struct object_id found_oid; + unsigned len = min_len; + + num = p->num_objects; + match = bsearch_pack(oid, p, &first); + + /* + * first is now the position in the packfile where we would insert + * the object ID if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. + */ + if (!match) { + if (!nth_packed_object_id(&found_oid, p, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (!nth_packed_object_id(&found_oid, p, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (!nth_packed_object_id(&found_oid, p, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + + *out = len; +} + +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + + m = get_multi_pack_index(store->source); + if (m) + find_abbrev_len_for_midx(m, oid, min_len, &min_len); + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + if (open_pack_index(e->pack) || !e->pack->num_objects) + continue; + + find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len); + } + + *out = min_len; + return 0; +} + struct add_promisor_object_data { struct repository *repo; struct oidset *set; diff --git a/packfile.h b/packfile.h index a16ec3950d..45b35973f0 100644 --- a/packfile.h +++ b/packfile.h @@ -367,7 +367,12 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); + +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index e6eede2740..d4d05c7dc3 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -24,7 +24,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) return 0; } -static void check_contains(struct oidtree *ot, const char *hex, int expected) +static void check_contains(struct oidtree *ot, const char *hex, bool expected) { struct object_id oid; @@ -38,7 +38,7 @@ struct expected_hex_iter { const char *query; }; -static enum cb_next check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *data) { struct expected_hex_iter *hex_iter = data; struct object_id expected; @@ -49,7 +49,7 @@ static enum cb_next check_each_cb(const struct object_id *oid, void *data) &expected); cl_assert_equal_s(oid_to_hex(oid), oid_to_hex(&expected)); hex_iter->i += 1; - return CB_CONTINUE; + return 0; } LAST_ARG_MUST_BE_NULL @@ -88,12 +88,12 @@ void test_oidtree__cleanup(void) void test_oidtree__contains(void) { FILL_TREE(&ot, "444", "1", "2", "3", "4", "5", "a", "b", "c", "d", "e"); - check_contains(&ot, "44", 0); - check_contains(&ot, "441", 0); - check_contains(&ot, "440", 0); - check_contains(&ot, "444", 1); - check_contains(&ot, "4440", 1); - check_contains(&ot, "4444", 0); + check_contains(&ot, "44", false); + check_contains(&ot, "441", false); + check_contains(&ot, "440", false); + check_contains(&ot, "444", true); + check_contains(&ot, "4440", true); + check_contains(&ot, "4444", false); } void test_oidtree__each(void)