odb: introduce generic odb_find_abbrev_len()

Introduce a new generic `odb_find_abbrev_len()` function as well as
source-specific callback functions. This makes the logic to compute the
required prefix length to make a given object unique fully pluggable.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Patrick Steinhardt
2026-03-20 08:07:40 +01:00
committed by Junio C Hamano
parent 6c2ede6e4a
commit 83869e15fa
5 changed files with 142 additions and 53 deletions

View File

@@ -15,10 +15,9 @@
#include "refs.h"
#include "remote.h"
#include "dir.h"
#include "odb.h"
#include "oid-array.h"
#include "packfile.h"
#include "pretty.h"
#include "object-file.h"
#include "read-cache-ll.h"
#include "repo-settings.h"
#include "repository.h"
@@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix,
return ret;
}
/*
* Return the slot of the most-significant bit set in "val". There are various
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
* probably not a big deal here.
*/
static unsigned msb(unsigned long val)
{
unsigned r = 0;
while (val >>= 1)
r++;
return r;
}
void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,
const struct object_id *oid, int abbrev_len)
{
@@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
{
const struct git_hash_algo *algo =
oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
const unsigned hexsz = algo->hexsz;
unsigned len;
if (min_len < 0) {
unsigned long count;
if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
count = 0;
/*
* Add one because the MSB only tells us the highest bit set,
* not including the value of all the _other_ bits (so "15"
* is only one off of 2^4, but the MSB is the 3rd bit.
*/
len = msb(count) + 1;
/*
* We now know we have on the order of 2^len objects, which
* expects a collision at 2^(len/2). But we also care about hex
* chars, not bits, and there are 4 bits per hex. So all
* together we need to divide by 2 and round up.
*/
len = DIV_ROUND_UP(len, 2);
/*
* For very small repos, we stick with our regular fallback.
*/
if (len < FALLBACK_DEFAULT_ABBREV)
len = FALLBACK_DEFAULT_ABBREV;
} else {
len = min_len;
}
if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0)
len = algo->hexsz;
oid_to_hex_r(hex, oid);
if (len >= hexsz || !len)
return hexsz;
odb_prepare_alternates(r->objects);
for (struct odb_source *s = r->objects->sources; s; s = s->next) {
struct odb_source_files *files = odb_source_files_downcast(s);
packfile_store_find_abbrev_len(files->packed, oid, len, &len);
odb_source_loose_find_abbrev_len(s, oid, len, &len);
}
hex[len] = 0;
return len;
}

73
odb.c
View File

@@ -12,6 +12,7 @@
#include "midx.h"
#include "object-file-convert.h"
#include "object-file.h"
#include "object-name.h"
#include "odb.h"
#include "packfile.h"
#include "path.h"
@@ -964,6 +965,78 @@ out:
return ret;
}
/*
* Return the slot of the most-significant bit set in "val". There are various
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
* probably not a big deal here.
*/
static unsigned msb(unsigned long val)
{
unsigned r = 0;
while (val >>= 1)
r++;
return r;
}
int odb_find_abbrev_len(struct object_database *odb,
const struct object_id *oid,
int min_length,
unsigned *out)
{
const struct git_hash_algo *algo =
oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo;
const unsigned hexsz = algo->hexsz;
unsigned len;
int ret;
if (min_length < 0) {
unsigned long count;
if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
count = 0;
/*
* Add one because the MSB only tells us the highest bit set,
* not including the value of all the _other_ bits (so "15"
* is only one off of 2^4, but the MSB is the 3rd bit.
*/
len = msb(count) + 1;
/*
* We now know we have on the order of 2^len objects, which
* expects a collision at 2^(len/2). But we also care about hex
* chars, not bits, and there are 4 bits per hex. So all
* together we need to divide by 2 and round up.
*/
len = DIV_ROUND_UP(len, 2);
/*
* For very small repos, we stick with our regular fallback.
*/
if (len < FALLBACK_DEFAULT_ABBREV)
len = FALLBACK_DEFAULT_ABBREV;
} else {
len = min_length;
}
if (len >= hexsz || !len) {
*out = hexsz;
ret = 0;
goto out;
}
odb_prepare_alternates(odb);
for (struct odb_source *source = odb->sources; source; source = source->next) {
ret = odb_source_find_abbrev_len(source, oid, len, &len);
if (ret)
goto out;
}
ret = 0;
*out = len;
out:
return ret;
}
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{

16
odb.h
View File

@@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb,
enum odb_count_objects_flags flags,
unsigned long *out);
/*
* Given an object ID, find the minimum required length required to make the
* object ID unique across the whole object database.
*
* The `min_len` determines the minimum abbreviated length that'll be returned
* by this function. If `min_len < 0`, then the function will set a sensible
* default minimum abbreviation length.
*
* Returns 0 on success, a negative error code otherwise. The computed length
* will be assigned to `*out`.
*/
int odb_find_abbrev_len(struct object_database *odb,
const struct object_id *oid,
int min_len,
unsigned *out);
enum {
/*
* By default, `odb_write_object()` does not actually write anything

View File

@@ -122,6 +122,30 @@ out:
return ret;
}
static int odb_source_files_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
struct odb_source_files *files = odb_source_files_downcast(source);
unsigned len = min_len;
int ret;
ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len);
if (ret < 0)
goto out;
ret = odb_source_loose_find_abbrev_len(source, oid, len, &len);
if (ret < 0)
goto out;
*out = len;
ret = 0;
out:
return ret;
}
static int odb_source_files_freshen_object(struct odb_source *source,
const struct object_id *oid)
{
@@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
files->base.read_object_stream = odb_source_files_read_object_stream;
files->base.for_each_object = odb_source_files_for_each_object;
files->base.count_objects = odb_source_files_count_objects;
files->base.find_abbrev_len = odb_source_files_find_abbrev_len;
files->base.freshen_object = odb_source_files_freshen_object;
files->base.write_object = odb_source_files_write_object;
files->base.write_object_stream = odb_source_files_write_object_stream;

View File

@@ -157,6 +157,18 @@ struct odb_source {
enum odb_count_objects_flags flags,
unsigned long *out);
/*
* This callback is expected to find the minimum required length to
* make the given object ID unique.
*
* The callback is expected to return a negative error code in case it
* failed, 0 otherwise.
*/
int (*find_abbrev_len)(struct odb_source *source,
const struct object_id *oid,
unsigned min_length,
unsigned *out);
/*
* This callback is expected to freshen the given object so that its
* last access time is set to the current time. This is used to ensure
@@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source,
return source->count_objects(source, flags, out);
}
/*
* Determine the minimum required length to make the given object ID unique in
* the given source. Returns 0 on success, a negative error code otherwise.
*/
static inline int odb_source_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
return source->find_abbrev_len(source, oid, min_len, out);
}
/*
* Freshen an object in the object database by updating its timestamp.
* Returns 1 in case the object has been freshened, 0 in case the object does