From 822d403651aa6baff064095f98d8d8349d876eb8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:31 +0200 Subject: [PATCH 01/17] odb: introduce "in-memory" source Next to our typical object database sources, each object database also has an implicit source of "cached" objects. These cached objects only exist in memory and some use cases: - They contain evergreen objects that we expect to always exist, like for example the empty tree. - They can be used to store temporary objects that we don't want to persist to disk, which is used by git-blame(1) to create a fake worktree commit. Overall, their use is somewhat restricted though. For example, we don't provide the ability to use it as a temporary object database source that allows the user to write objects, but discard them after Git exists. So while these cached objects behave almost like a source, they aren't used as one. This is about to change over the following commits, where we will turn cached objects into a new "in-memory" source. This will allow us to use it exactly the same as any other source by providing the same common interface as the "files" source. For now, the in-memory source only hosts the cached objects and doesn't provide any logic yet. This will change with subsequent commits, where we move respective functionality into the source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + meson.build | 1 + odb.c | 21 +++++++++++++-------- odb.h | 4 ++-- odb/source-inmemory.c | 12 ++++++++++++ odb/source-inmemory.h | 35 +++++++++++++++++++++++++++++++++++ odb/source.h | 3 +++ 7 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 odb/source-inmemory.c create mode 100644 odb/source-inmemory.h diff --git a/Makefile b/Makefile index 22a8993482..3cda12c455 100644 --- a/Makefile +++ b/Makefile @@ -1218,6 +1218,7 @@ LIB_OBJS += object.o LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o +LIB_OBJS += odb/source-inmemory.o LIB_OBJS += odb/streaming.o LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o diff --git a/meson.build b/meson.build index 6dc23b3af2..ffa73ce7ce 100644 --- a/meson.build +++ b/meson.build @@ -404,6 +404,7 @@ libgit_sources = [ 'odb.c', 'odb/source.c', 'odb/source-files.c', + 'odb/source-inmemory.c', 'odb/streaming.c', 'odb/transaction.c', 'oid-array.c', diff --git a/odb.c b/odb.c index 40a5e9c4e0..60e1eead25 100644 --- a/odb.c +++ b/odb.c @@ -14,6 +14,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source-inmemory.h" #include "packfile.h" #include "path.h" #include "promisor-remote.h" @@ -53,9 +54,9 @@ static const struct cached_object *find_cached_object(struct object_database *ob .type = OBJ_TREE, .buf = "", }; - const struct cached_object_entry *co = object_store->cached_objects; + const struct cached_object_entry *co = object_store->inmemory_objects->objects; - for (size_t i = 0; i < object_store->cached_object_nr; i++, co++) + for (size_t i = 0; i < object_store->inmemory_objects->objects_nr; i++, co++) if (oideq(&co->oid, oid)) return &co->value; @@ -792,9 +793,10 @@ int odb_pretend_object(struct object_database *odb, find_cached_object(odb, oid)) return 0; - ALLOC_GROW(odb->cached_objects, - odb->cached_object_nr + 1, odb->cached_object_alloc); - co = &odb->cached_objects[odb->cached_object_nr++]; + ALLOC_GROW(odb->inmemory_objects->objects, + odb->inmemory_objects->objects_nr + 1, + odb->inmemory_objects->objects_alloc); + co = &odb->inmemory_objects->objects[odb->inmemory_objects->objects_nr++]; co->value.size = len; co->value.type = type; co_buf = xmalloc(len); @@ -1083,6 +1085,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->inmemory_objects = odb_source_inmemory_new(o); free(to_free); @@ -1123,9 +1126,11 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->cached_object_nr; i++) - free((char *) o->cached_objects[i].value.buf); - free(o->cached_objects); + for (size_t i = 0; i < o->inmemory_objects->objects_nr; i++) + free((char *) o->inmemory_objects->objects[i].value.buf); + free(o->inmemory_objects->objects); + free(o->inmemory_objects->base.path); + free(o->inmemory_objects); string_list_clear(&o->submodule_source_paths, 0); diff --git a/odb.h b/odb.h index 9eb8355aca..c3a7edf9c8 100644 --- a/odb.h +++ b/odb.h @@ -8,6 +8,7 @@ #include "thread-utils.h" struct cached_object_entry; +struct odb_source_inmemory; struct packed_git; struct repository; struct strbuf; @@ -80,8 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct cached_object_entry *cached_objects; - size_t cached_object_nr, cached_object_alloc; + struct odb_source_inmemory *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c new file mode 100644 index 0000000000..c7ac5c24f0 --- /dev/null +++ b/odb/source-inmemory.c @@ -0,0 +1,12 @@ +#include "git-compat-util.h" +#include "odb/source-inmemory.h" + +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) +{ + struct odb_source_inmemory *source; + + CALLOC_ARRAY(source, 1); + odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + + return source; +} diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h new file mode 100644 index 0000000000..15db068ef7 --- /dev/null +++ b/odb/source-inmemory.h @@ -0,0 +1,35 @@ +#ifndef ODB_SOURCE_INMEMORY_H +#define ODB_SOURCE_INMEMORY_H + +#include "odb/source.h" + +struct cached_object_entry; + +/* + * An in-memory source that you can write objects to that shall be made + * available for reading, but that shouldn't ever be persisted to disk. Note + * that any objects written to this source will be stored in memory, so the + * number of objects you can store is limited by available system memory. + */ +struct odb_source_inmemory { + struct odb_source base; + + struct cached_object_entry *objects; + size_t objects_nr, objects_alloc; +}; + +/* Create a new in-memory object database source. */ +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb); + +/* + * Cast the given object database source to the in-memory backend. This will + * cause a BUG in case the source doesn't use this backend. + */ +static inline struct odb_source_inmemory *odb_source_inmemory_downcast(struct odb_source *source) +{ + if (source->type != ODB_SOURCE_INMEMORY) + BUG("trying to downcast source of type '%d' to in-memory", source->type); + return container_of(source, struct odb_source_inmemory, base); +} + +#endif diff --git a/odb/source.h b/odb/source.h index f706e0608a..0a440884e4 100644 --- a/odb/source.h +++ b/odb/source.h @@ -13,6 +13,9 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + + /* The "in-memory" backend that stores objects in memory. */ + ODB_SOURCE_INMEMORY, }; struct object_id; From 8caa2e090f1b83df7c0fc82ed7f7c8772f3ec5f4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:32 +0200 Subject: [PATCH 02/17] odb/source-inmemory: implement `free()` callback Implement the `free()` callback function for the "in-memory" source. Note that this requires us to define `struct cached_object_entry` in "odb/source-inmemory.h", as it is accessed in both "odb.c" and "odb/source-inmemory.c" now. This will be fixed in subsequent commits though. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 25 ++++--------------------- odb/source-inmemory.c | 12 ++++++++++++ odb/source-inmemory.h | 9 ++++++++- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/odb.c b/odb.c index 60e1eead25..1d65825ed3 100644 --- a/odb.c +++ b/odb.c @@ -32,21 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -/* - * This is meant to hold a *small* number of objects that you would - * want odb_read_object() to be able to return, but yet you do not want - * to write them into the object store (e.g. a browse-only - * application). - */ -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; - static const struct cached_object *find_cached_object(struct object_database *object_store, const struct object_id *oid) { @@ -1109,6 +1094,10 @@ static void odb_free_sources(struct object_database *o) odb_source_free(o->sources); o->sources = next; } + + odb_source_free(&o->inmemory_objects->base); + o->inmemory_objects = NULL; + kh_destroy_odb_path_map(o->source_by_path); o->source_by_path = NULL; } @@ -1126,12 +1115,6 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->inmemory_objects->objects_nr; i++) - free((char *) o->inmemory_objects->objects[i].value.buf); - free(o->inmemory_objects->objects); - free(o->inmemory_objects->base.path); - free(o->inmemory_objects); - string_list_clear(&o->submodule_source_paths, 0); free(o); diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index c7ac5c24f0..ccbb622eae 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,6 +1,16 @@ #include "git-compat-util.h" #include "odb/source-inmemory.h" +static void odb_source_inmemory_free(struct odb_source *source) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + for (size_t i = 0; i < inmemory->objects_nr; i++) + free((char *) inmemory->objects[i].value.buf); + free(inmemory->objects); + free(inmemory->base.path); + free(inmemory); +} + struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) { struct odb_source_inmemory *source; @@ -8,5 +18,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) CALLOC_ARRAY(source, 1); odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + source->base.free = odb_source_inmemory_free; + return source; } diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h index 15db068ef7..d1b05a3996 100644 --- a/odb/source-inmemory.h +++ b/odb/source-inmemory.h @@ -3,7 +3,14 @@ #include "odb/source.h" -struct cached_object_entry; +struct cached_object_entry { + struct object_id oid; + struct cached_object { + enum object_type type; + const void *buf; + unsigned long size; + } value; +}; /* * An in-memory source that you can write objects to that shall be made From 87de1b31e04fc5ce4f47c2a8dbfdc90b25e5bdbe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:33 +0200 Subject: [PATCH 03/17] odb: fix unnecessary call to `find_cached_object()` The function `odb_pretend_object()` writes an object into the in-memory object database source. The effect of this is that the object will now become readable, but it won't ever be persisted to disk. Before storing the object, we first verify whether the object already exists. This is done by calling `odb_has_object()` to check all sources, followed by `find_cached_object()` to check whether we have already stored the object in our in-memory source. This is unnecessary though, as `odb_has_object()` already checks the in-memory source transitively via: - `odb_has_object()` - `odb_read_object_info_extended()` - `do_oid_object_info_extended()` - `find_cached_object()` Drop the explicit call to `find_cached_object()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/odb.c b/odb.c index 1d65825ed3..ea3fcf5e11 100644 --- a/odb.c +++ b/odb.c @@ -774,8 +774,7 @@ int odb_pretend_object(struct object_database *odb, char *co_buf; hash_object_file(odb->repo->hash_algo, buf, len, type, oid); - if (odb_has_object(odb, oid, 0) || - find_cached_object(odb, oid)) + if (odb_has_object(odb, oid, 0)) return 0; ALLOC_GROW(odb->inmemory_objects->objects, From ec45c1e8bf8958bdcca2b324573d02ac934c51ea Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:34 +0200 Subject: [PATCH 04/17] odb/source-inmemory: implement `read_object_info()` callback Implement the `read_object_info()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 39 +------------------------------ odb/source-inmemory.c | 53 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/odb.c b/odb.c index ea3fcf5e11..6a3912adac 100644 --- a/odb.c +++ b/odb.c @@ -32,25 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -static const struct cached_object *find_cached_object(struct object_database *object_store, - const struct object_id *oid) -{ - static const struct cached_object empty_tree = { - .type = OBJ_TREE, - .buf = "", - }; - const struct cached_object_entry *co = object_store->inmemory_objects->objects; - - for (size_t i = 0; i < object_store->inmemory_objects->objects_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; - - if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) - return &empty_tree; - - return NULL; -} - int odb_mkstemp(struct object_database *odb, struct strbuf *temp_filename, const char *pattern) { @@ -570,7 +551,6 @@ static int do_oid_object_info_extended(struct object_database *odb, const struct object_id *oid, struct object_info *oi, unsigned flags) { - const struct cached_object *co; const struct object_id *real = oid; int already_retried = 0; @@ -580,25 +560,8 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - co = find_cached_object(odb, real); - if (co) { - if (oi) { - if (oi->typep) - *(oi->typep) = co->type; - if (oi->sizep) - *(oi->sizep) = co->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(co->buf, co->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } + if (!odb_source_read_object_info(&odb->inmemory_objects->base, oid, oi, flags)) return 0; - } odb_prepare_alternates(odb); diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index ccbb622eae..12c80f9b34 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,5 +1,57 @@ #include "git-compat-util.h" +#include "odb.h" #include "odb/source-inmemory.h" +#include "repository.h" + +static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) +{ + static const struct cached_object empty_tree = { + .type = OBJ_TREE, + .buf = "", + }; + const struct cached_object_entry *co = source->objects; + + for (size_t i = 0; i < source->objects_nr; i++, co++) + if (oideq(&co->oid, oid)) + return &co->value; + + if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) + return &empty_tree; + + return NULL; +} + +static int odb_source_inmemory_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + const struct cached_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + if (oi) { + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; + } + + return 0; +} static void odb_source_inmemory_free(struct odb_source *source) { @@ -19,6 +71,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); source->base.free = odb_source_inmemory_free; + source->base.read_object_info = odb_source_inmemory_read_object_info; return source; } From 8d9c1e421ce36be06ff304ce166593cf2e4ef66f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:35 +0200 Subject: [PATCH 05/17] odb/source-inmemory: implement `read_object_stream()` callback Implement the `read_object_stream()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 12c80f9b34..39f0e799c7 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "odb.h" #include "odb/source-inmemory.h" +#include "odb/streaming.h" #include "repository.h" static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, @@ -53,6 +54,56 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, return 0; } +struct odb_read_stream_inmemory { + struct odb_read_stream base; + const unsigned char *buf; + size_t offset; +}; + +static ssize_t odb_read_stream_inmemory_read(struct odb_read_stream *stream, + char *buf, size_t buf_len) +{ + struct odb_read_stream_inmemory *inmemory = + container_of(stream, struct odb_read_stream_inmemory, base); + size_t bytes = buf_len; + + if (buf_len > inmemory->base.size - inmemory->offset) + bytes = inmemory->base.size - inmemory->offset; + + memcpy(buf, inmemory->buf + inmemory->offset, bytes); + inmemory->offset += bytes; + + return bytes; +} + +static int odb_read_stream_inmemory_close(struct odb_read_stream *stream UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_read_stream_inmemory *stream; + const struct cached_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + CALLOC_ARRAY(stream, 1); + stream->base.read = odb_read_stream_inmemory_read; + stream->base.close = odb_read_stream_inmemory_close; + stream->base.size = object->size; + stream->base.type = object->type; + stream->buf = object->buf; + + *out = &stream->base; + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -72,6 +123,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; + source->base.read_object_stream = odb_source_inmemory_read_object_stream; return source; } From f611f4ba41de07a89649c74c01477cf55b20bc31 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:36 +0200 Subject: [PATCH 06/17] odb/source-inmemory: implement `write_object()` callback Implement the `write_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 16 ++-------------- odb/source-inmemory.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index 6a3912adac..24e929f03c 100644 --- a/odb.c +++ b/odb.c @@ -733,24 +733,12 @@ int odb_pretend_object(struct object_database *odb, void *buf, unsigned long len, enum object_type type, struct object_id *oid) { - struct cached_object_entry *co; - char *co_buf; - hash_object_file(odb->repo->hash_algo, buf, len, type, oid); if (odb_has_object(odb, oid, 0)) return 0; - ALLOC_GROW(odb->inmemory_objects->objects, - odb->inmemory_objects->objects_nr + 1, - odb->inmemory_objects->objects_alloc); - co = &odb->inmemory_objects->objects[odb->inmemory_objects->objects_nr++]; - co->value.size = len; - co->value.type = type; - co_buf = xmalloc(len); - memcpy(co_buf, buf, len); - co->value.buf = co_buf; - oidcpy(&co->oid, oid); - return 0; + return odb_source_write_object(&odb->inmemory_objects->base, + buf, len, type, oid, NULL, 0); } void *odb_read_object(struct object_database *odb, diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 39f0e799c7..4848011df5 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "object-file.h" #include "odb.h" #include "odb/source-inmemory.h" #include "odb/streaming.h" @@ -104,6 +105,29 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, return 0; } +static int odb_source_inmemory_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid UNUSED, + enum odb_write_object_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct cached_object_entry *object; + + hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); + + ALLOC_GROW(inmemory->objects, inmemory->objects_nr + 1, + inmemory->objects_alloc); + object = &inmemory->objects[inmemory->objects_nr++]; + object->value.size = len; + object->value.type = type; + object->value.buf = xmemdupz(buf, len); + oidcpy(&object->oid, oid); + + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -124,6 +148,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.write_object = odb_source_inmemory_write_object; return source; } From 197c8a85e37720e54afda1ed92bf8b393cca92f1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:37 +0200 Subject: [PATCH 07/17] odb/source-inmemory: implement `write_object_stream()` callback Implement the `write_object_stream()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 4848011df5..d05a13df45 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -128,6 +128,45 @@ static int odb_source_inmemory_write_object(struct odb_source *source, return 0; } +static int odb_source_inmemory_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + char buf[16384]; + size_t total_read = 0; + char *data; + int ret; + + CALLOC_ARRAY(data, len); + while (!stream->is_finished) { + ssize_t bytes_read; + + bytes_read = odb_write_stream_read(stream, buf, sizeof(buf)); + if (total_read + bytes_read > len) { + ret = error("object stream yielded more bytes than expected"); + goto out; + } + + memcpy(data + total_read, buf, bytes_read); + total_read += bytes_read; + } + + if (total_read != len) { + ret = error("object stream yielded less bytes than expected"); + goto out; + } + + ret = odb_source_inmemory_write_object(source, data, len, OBJ_BLOB, oid, + NULL, 0); + if (ret < 0) + goto out; + +out: + free(data); + return ret; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -149,6 +188,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.write_object = odb_source_inmemory_write_object; + source->base.write_object_stream = odb_source_inmemory_write_object_stream; return source; } From 550d7b7c89a9cf80794c72e8c7d036164a5b1927 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:38 +0200 Subject: [PATCH 08/17] cbtree: allow using arbitrary wrapper structures for nodes The cbtree subsystem allows the user to store arbitrary data in a prefix-free set of strings. This is used by us to store object IDs in a way that we can easily iterate through them in lexicograph order, and so that we can easily perform lookups with shortened object IDs. In its current form, it is not easily possible to store arbitrary data with the tree nodes. There are a couple of approaches such a caller could try to use, but none of them really work: - One may embed the `struct cb_node` in a custom structure. This does not work though as `struct cb_node` contains a flex array, and embedding such a struct in another struct is forbidden. - One may use a `union` over `struct cb_node` and ones own data type, which _is_ allowed even if the struct contains a flex array. This does not work though, as the compiler may align members of the struct so that the node key would not immediately start where the flex array starts. - One may allocate `struct cb_node` such that it has room for both its key and the custom data. This has the downside though that if the custom data is itself a pointer to allocated memory, then the leak checker will not consider the pointer to be alive anymore. Refactor the cbtree to drop the flex array and instead take in an explicit offset for where to find the key, which allows the caller to embed `struct cb_node` is a wrapper struct. Note that this change has the downside that we now have a bit of padding in our structure, which grows the size from 60 to 64 bytes on a 64 bit system. On the other hand though, it allows us to get rid of the memory copies that we previously had to do to ensure proper alignment. This seems like a reasonable tradeoff. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cbtree.c | 25 ++++++++++++++++++------- cbtree.h | 17 +++++++++-------- oidtree.c | 33 ++++++++++++++------------------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/cbtree.c b/cbtree.c index 4ab794bddc..8f5edbb80a 100644 --- a/cbtree.c +++ b/cbtree.c @@ -7,6 +7,11 @@ #include "git-compat-util.h" #include "cbtree.h" +static inline uint8_t *cb_node_key(struct cb_tree *t, struct cb_node *node) +{ + return (uint8_t *) node + t->key_offset; +} + static struct cb_node *cb_node_of(const void *p) { return (struct cb_node *)((uintptr_t)p - 1); @@ -33,6 +38,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) uint8_t c; int newdirection; struct cb_node **wherep, *p; + uint8_t *node_key, *p_key; assert(!((uintptr_t)node & 1)); /* allocations must be aligned */ @@ -41,23 +47,26 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) return NULL; /* success */ } + node_key = cb_node_key(t, node); + /* see if a node already exists */ - p = cb_internal_best_match(t->root, node->k, klen); + p = cb_internal_best_match(t->root, node_key, klen); + p_key = cb_node_key(t, p); /* find first differing byte */ for (newbyte = 0; newbyte < klen; newbyte++) { - if (p->k[newbyte] != node->k[newbyte]) + if (p_key[newbyte] != node_key[newbyte]) goto different_byte_found; } return p; /* element exists, let user deal with it */ different_byte_found: - newotherbits = p->k[newbyte] ^ node->k[newbyte]; + newotherbits = p_key[newbyte] ^ node_key[newbyte]; newotherbits |= newotherbits >> 1; newotherbits |= newotherbits >> 2; newotherbits |= newotherbits >> 4; newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; - c = p->k[newbyte]; + c = p_key[newbyte]; newdirection = (1 + (newotherbits | c)) >> 8; node->byte = newbyte; @@ -78,7 +87,7 @@ different_byte_found: break; if (q->byte == newbyte && q->otherbits > newotherbits) break; - c = q->byte < klen ? node->k[q->byte] : 0; + c = q->byte < klen ? node_key[q->byte] : 0; direction = (1 + (q->otherbits | c)) >> 8; wherep = q->child + direction; } @@ -93,7 +102,7 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) { struct cb_node *p = cb_internal_best_match(t->root, k, klen); - return p && !memcmp(p->k, k, klen) ? p : NULL; + return p && !memcmp(cb_node_key(t, p), k, klen) ? p : NULL; } static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) @@ -115,6 +124,7 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; + uint8_t *p_key; if (!p) return 0; /* empty tree */ @@ -130,8 +140,9 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, top = p; } + p_key = cb_node_key(t, p); for (i = 0; i < klen; i++) { - if (p->k[i] != kpfx[i]) + if (p_key[i] != kpfx[i]) return 0; /* "best" match failed */ } diff --git a/cbtree.h b/cbtree.h index c374b1b3db..4647d4a32f 100644 --- a/cbtree.h +++ b/cbtree.h @@ -6,9 +6,9 @@ * * This is adapted to store arbitrary data (not just NUL-terminated C strings * and allocates no memory internally. The user needs to allocate - * "struct cb_node" and fill cb_node.k[] with arbitrary match data - * for memcmp. - * If "klen" is variable, then it should be embedded into "c_node.k[]" + * "struct cb_node" and provide `key_offset` to indicate where the key can be + * found relative to the `struct cb_node` for memcmp. + * If "klen" is variable, then it should be embedded into the key. * Recursion is bound by the maximum value of "klen" used. */ #ifndef CBTREE_H @@ -23,18 +23,19 @@ struct cb_node { */ uint32_t byte; uint8_t otherbits; - uint8_t k[FLEX_ARRAY]; /* arbitrary data, unaligned */ }; struct cb_tree { struct cb_node *root; + ptrdiff_t key_offset; }; -#define CBTREE_INIT { 0 } - -static inline void cb_init(struct cb_tree *t) +static inline void cb_init(struct cb_tree *t, + ptrdiff_t key_offset) { - struct cb_tree blank = CBTREE_INIT; + struct cb_tree blank = { + .key_offset = key_offset, + }; memcpy(t, &blank, sizeof(*t)); } diff --git a/oidtree.c b/oidtree.c index ab9fe7ec7a..117649753f 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,9 +6,14 @@ #include "oidtree.h" #include "hash.h" +struct oidtree_node { + struct cb_node base; + struct object_id key; +}; + void oidtree_init(struct oidtree *ot) { - cb_init(&ot->tree); + cb_init(&ot->tree, offsetof(struct oidtree_node, key)); mem_pool_init(&ot->mem_pool, 0); } @@ -22,20 +27,13 @@ void oidtree_clear(struct oidtree *ot) void oidtree_insert(struct oidtree *ot, const struct object_id *oid) { - struct cb_node *on; - struct object_id k; + struct oidtree_node *on; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); - on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid)); - - /* - * Clear the padding and copy the result in separate steps to - * respect the 4-byte alignment needed by struct object_id. - */ - oidcpy(&k, oid); - memcpy(on->k, &k, sizeof(k)); + on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); + oidcpy(&on->key, oid); /* * n.b. Current callers won't get us duplicates, here. If a @@ -43,7 +41,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, on, sizeof(*oid)); + cb_insert(&ot->tree, &on->base, sizeof(*oid)); } bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) @@ -73,21 +71,18 @@ struct oidtree_each_data { static int iter(struct cb_node *n, void *cb_data) { + struct oidtree_node *node = container_of(n, struct oidtree_node, base); struct oidtree_each_data *data = cb_data; - struct object_id k; - /* Copy to provide 4-byte alignment needed by struct object_id. */ - memcpy(&k, n->k, sizeof(k)); - - if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != node->key.algo) return 0; if (data->last_nibble_at) { - if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) + if ((node->key.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return 0; } - return data->cb(&k, data->cb_data); + return data->cb(&node->key, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, From 449650decf49b1fe5b1dac1c48dfb919e9b57b0d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:39 +0200 Subject: [PATCH 09/17] oidtree: add ability to store data The oidtree data structure is currently only used to store object IDs, without any associated data. So consequently, it can only really be used to track which object IDs exist, and we can use the tree structure to efficiently operate on OID prefixes. But there are valid use cases where we want to both: - Store object IDs in a sorted order. - Associated arbitrary data with them. Refactor the oidtree interface so that it allows us to store arbitrary payloads within the respective nodes. This will be used in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 2 +- object-file.c | 3 ++- oidtree.c | 37 ++++++++++++++++++++++++++++++++----- oidtree.h | 12 ++++++++++-- t/unit-tests/u-oidtree.c | 26 +++++++++++++++++++++++--- 5 files changed, 68 insertions(+), 12 deletions(-) diff --git a/loose.c b/loose.c index 07333be696..f7a3dd1a72 100644 --- a/loose.c +++ b/loose.c @@ -57,7 +57,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(files->loose->cache, compat_oid); + oidtree_insert(files->loose->cache, compat_oid, NULL); return inserted; } diff --git a/object-file.c b/object-file.c index 7b1a12f8eb..8705251e4d 100644 --- a/object-file.c +++ b/object-file.c @@ -1858,6 +1858,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *node_data UNUSED, void *cb_data) { struct for_each_object_wrapper_data *data = cb_data; @@ -2003,7 +2004,7 @@ static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) { - oidtree_insert(data, oid); + oidtree_insert(data, oid, NULL); return 0; } diff --git a/oidtree.c b/oidtree.c index 117649753f..e43f18026e 100644 --- a/oidtree.c +++ b/oidtree.c @@ -9,6 +9,7 @@ struct oidtree_node { struct cb_node base; struct object_id key; + void *data; }; void oidtree_init(struct oidtree *ot) @@ -25,15 +26,22 @@ void oidtree_clear(struct oidtree *ot) } } -void oidtree_insert(struct oidtree *ot, const struct object_id *oid) +struct oidtree_data { + struct object_id oid; +}; + +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data) { struct oidtree_node *on; + struct cb_node *node; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); oidcpy(&on->key, oid); + on->data = data; /* * n.b. Current callers won't get us duplicates, here. If a @@ -41,13 +49,19 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, &on->base, sizeof(*oid)); + node = cb_insert(&ot->tree, &on->base, sizeof(*oid)); + if (node) { + struct oidtree_node *preexisting = container_of(node, struct oidtree_node, base); + preexisting->data = data; + } } -bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +static struct oidtree_node *oidtree_lookup(struct oidtree *ot, + const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); + struct cb_node *node; oidcpy(&k, oid); @@ -58,7 +72,20 @@ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + node = cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + return node ? container_of(node, struct oidtree_node, base) : NULL; +} + +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? 1 : 0; +} + +void *oidtree_get(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? node->data : NULL; } struct oidtree_each_data { @@ -82,7 +109,7 @@ static int iter(struct cb_node *n, void *cb_data) return 0; } - return data->cb(&node->key, data->cb_data); + return data->cb(&node->key, node->data, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, diff --git a/oidtree.h b/oidtree.h index 2b7bad2e60..baa5a436ea 100644 --- a/oidtree.h +++ b/oidtree.h @@ -29,18 +29,26 @@ void oidtree_init(struct oidtree *ot); */ void oidtree_clear(struct oidtree *ot); -/* Insert the object ID into the tree. */ -void oidtree_insert(struct oidtree *ot, const struct object_id *oid); +/* + * Insert the object ID into the tree and store the given pointer alongside + * with it. The data pointer of any preexisting entry will be overwritten. + */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); +/* Get the payload stored with the given object ID. */ +void *oidtree_get(struct oidtree *ot, const struct object_id *oid); + /* * Callback function used for `oidtree_each()`. Returning a non-zero exit code * will cause iteration to stop. The exit code will be propagated to the caller * of `oidtree_each()`. */ typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *node_data, void *cb_data); /* diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index d4d05c7dc3..f0d5ebb733 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -19,7 +19,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) for (size_t i = 0; i < n; i++) { struct object_id oid; cl_parse_any_oid(hexes[i], &oid); - oidtree_insert(ot, &oid); + oidtree_insert(ot, &oid, NULL); } return 0; } @@ -38,9 +38,9 @@ struct expected_hex_iter { const char *query; }; -static int check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *node_data UNUSED, void *cb_data) { - struct expected_hex_iter *hex_iter = data; + struct expected_hex_iter *hex_iter = cb_data; struct object_id expected; cl_assert(hex_iter->i < hex_iter->expected_hexes.nr); @@ -105,3 +105,23 @@ void test_oidtree__each(void) check_each(&ot, "32100", "321", NULL); check_each(&ot, "32", "320", "321", NULL); } + +void test_oidtree__insert_overwrites_data(void) +{ + struct object_id oid; + struct oidtree ot; + int a, b; + + cl_parse_any_oid("1", &oid); + + oidtree_init(&ot); + + oidtree_insert(&ot, &oid, NULL); + cl_assert_equal_p(oidtree_get(&ot, &oid), NULL); + oidtree_insert(&ot, &oid, &a); + cl_assert_equal_p(oidtree_get(&ot, &oid), &a); + oidtree_insert(&ot, &oid, &b); + cl_assert_equal_p(oidtree_get(&ot, &oid), &b); + + oidtree_clear(&ot); +} From c04907694601556de0ce862ad4f80fc55ec38c62 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:40 +0200 Subject: [PATCH 10/17] odb/source-inmemory: convert to use oidtree The in-memory source stores its objects in a simple array that we grow as needed. This has a couple of downsides: - The object lookup is O(n). This doesn't matter in practice because we only store a small number of objects. - We don't have an easy way to iterate over all objects in lexicographic order. - We don't have an easy way to compute unique object ID prefixes. Refactor the code to use an oidtree instead. This is the same data structure used by our loose object source, and thus it means we get a bunch of functionality for free. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 72 +++++++++++++++++++++++++++++++------------ odb/source-inmemory.h | 13 ++------ 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index d05a13df45..3b51cc7fef 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -3,20 +3,29 @@ #include "odb.h" #include "odb/source-inmemory.h" #include "odb/streaming.h" +#include "oidtree.h" #include "repository.h" -static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, - const struct object_id *oid) +struct inmemory_object { + enum object_type type; + const void *buf; + unsigned long size; +}; + +static const struct inmemory_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) { - static const struct cached_object empty_tree = { + static const struct inmemory_object empty_tree = { .type = OBJ_TREE, .buf = "", }; - const struct cached_object_entry *co = source->objects; + const struct inmemory_object *object; - for (size_t i = 0; i < source->objects_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; + if (source->objects) { + object = oidtree_get(source->objects, oid); + if (object) + return object; + } if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) return &empty_tree; @@ -30,7 +39,7 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, enum object_info_flags flags UNUSED) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - const struct cached_object *object; + const struct inmemory_object *object; object = find_cached_object(inmemory, oid); if (!object) @@ -88,7 +97,7 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); struct odb_read_stream_inmemory *stream; - const struct cached_object *object; + const struct inmemory_object *object; object = find_cached_object(inmemory, oid); if (!object) @@ -113,17 +122,23 @@ static int odb_source_inmemory_write_object(struct odb_source *source, enum odb_write_object_flags flags UNUSED) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - struct cached_object_entry *object; + struct inmemory_object *object; hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); - ALLOC_GROW(inmemory->objects, inmemory->objects_nr + 1, - inmemory->objects_alloc); - object = &inmemory->objects[inmemory->objects_nr++]; - object->value.size = len; - object->value.type = type; - object->value.buf = xmemdupz(buf, len); - oidcpy(&object->oid, oid); + if (!inmemory->objects) { + CALLOC_ARRAY(inmemory->objects, 1); + oidtree_init(inmemory->objects); + } else if (oidtree_contains(inmemory->objects, oid)) { + return 0; + } + + CALLOC_ARRAY(object, 1); + object->size = len; + object->type = type; + object->buf = xmemdupz(buf, len); + + oidtree_insert(inmemory->objects, oid, object); return 0; } @@ -167,12 +182,29 @@ out: return ret; } +static int inmemory_object_free(const struct object_id *oid UNUSED, + void *node_data, + void *cb_data UNUSED) +{ + struct inmemory_object *object = node_data; + free((void *) object->buf); + free(object); + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - for (size_t i = 0; i < inmemory->objects_nr; i++) - free((char *) inmemory->objects[i].value.buf); - free(inmemory->objects); + + if (inmemory->objects) { + struct object_id null_oid = { 0 }; + + oidtree_each(inmemory->objects, &null_oid, 0, + inmemory_object_free, NULL); + oidtree_clear(inmemory->objects); + free(inmemory->objects); + } + free(inmemory->base.path); free(inmemory); } diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h index d1b05a3996..a88fc2e320 100644 --- a/odb/source-inmemory.h +++ b/odb/source-inmemory.h @@ -3,14 +3,7 @@ #include "odb/source.h" -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; +struct oidtree; /* * An in-memory source that you can write objects to that shall be made @@ -20,9 +13,7 @@ struct cached_object_entry { */ struct odb_source_inmemory { struct odb_source base; - - struct cached_object_entry *objects; - size_t objects_nr, objects_alloc; + struct oidtree *objects; }; /* Create a new in-memory object database source. */ From 4babe3b673882adf853526475192ae7e3007877c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:41 +0200 Subject: [PATCH 11/17] odb/source-inmemory: implement `for_each_object()` callback Implement the `for_each_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 88 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 3b51cc7fef..f60eecbdbb 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -33,6 +33,28 @@ static const struct inmemory_object *find_cached_object(struct odb_source_inmemo return NULL; } +static void populate_object_info(struct odb_source_inmemory *source, + struct object_info *oi, + const struct inmemory_object *object) +{ + if (!oi) + return; + + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->base.odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; +} + static int odb_source_inmemory_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, @@ -45,22 +67,7 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, if (!object) return -1; - if (oi) { - if (oi->typep) - *(oi->typep) = object->type; - if (oi->sizep) - *(oi->sizep) = object->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(object->buf, object->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } - + populate_object_info(inmemory, oi, object); return 0; } @@ -114,6 +121,54 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, return 0; } +struct odb_source_inmemory_for_each_object_data { + struct odb_source_inmemory *inmemory; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int odb_source_inmemory_for_each_object_cb(const struct object_id *oid, + void *node_data, void *cb_data) +{ + struct odb_source_inmemory_for_each_object_data *data = cb_data; + struct inmemory_object *object = node_data; + + if (data->request) { + struct object_info oi = *data->request; + populate_object_info(data->inmemory, &oi, object); + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int odb_source_inmemory_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_source_inmemory_for_each_object_data payload = { + .inmemory = inmemory, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + struct object_id null_oid = { 0 }; + + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) || + (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)) + return 0; + if (!inmemory->objects) + return 0; + + return oidtree_each(inmemory->objects, + opts->prefix ? opts->prefix : &null_oid, opts->prefix_hex_len, + odb_source_inmemory_for_each_object_cb, &payload); +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -219,6 +274,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.for_each_object = odb_source_inmemory_for_each_object; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 3bd2856d3448943c4037d454f3e9cc0135330e73 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:42 +0200 Subject: [PATCH 12/17] odb/source-inmemory: implement `find_abbrev_len()` callback Implement the `find_abbrev_len()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index f60eecbdbb..44d9bbedec 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -169,6 +169,44 @@ static int odb_source_inmemory_for_each_object(struct odb_source *source, odb_source_inmemory_for_each_object_cb, &payload); } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_inmemory_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -275,6 +313,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; + source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 27d219132afe13db43d9732caeb37a14c026e717 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:43 +0200 Subject: [PATCH 13/17] odb/source-inmemory: implement `count_objects()` callback Implement the `count_objects()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 44d9bbedec..674dbcad30 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -207,6 +207,25 @@ static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, return ret; } +static int count_objects_cb(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *cb_data) +{ + unsigned long *counter = cb_data; + (*counter)++; + return 0; +} + +static int odb_source_inmemory_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags UNUSED, + unsigned long *out) +{ + struct odb_for_each_object_options opts = { 0 }; + *out = 0; + return odb_source_inmemory_for_each_object(source, NULL, count_objects_cb, + out, &opts); +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -314,6 +333,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; + source->base.count_objects = odb_source_inmemory_count_objects; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 7357196c49d537588d6c450fa3a902fac13cfbb9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:44 +0200 Subject: [PATCH 14/17] odb/source-inmemory: implement `freshen_object()` callback Implement the `freshen_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 674dbcad30..8934e0f547 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -294,6 +294,15 @@ out: return ret; } +static int odb_source_inmemory_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + if (find_cached_object(inmemory, oid)) + return 1; + return 0; +} + static int inmemory_object_free(const struct object_id *oid UNUSED, void *node_data, void *cb_data UNUSED) @@ -336,6 +345,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.count_objects = odb_source_inmemory_count_objects; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; + source->base.freshen_object = odb_source_inmemory_freshen_object; return source; } From 314fa0199ddc1a37069ab7c006a5b0bb8e72f45d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:45 +0200 Subject: [PATCH 15/17] odb/source-inmemory: stub out remaining functions Stub out remaining functions that we either don't need or that are basically no-ops. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 8934e0f547..e004566d76 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -303,6 +303,32 @@ static int odb_source_inmemory_freshen_object(struct odb_source *source, return 0; } +static int odb_source_inmemory_begin_transaction(struct odb_source *source UNUSED, + struct odb_transaction **out UNUSED) +{ + return error("in-memory source does not support transactions"); +} + +static int odb_source_inmemory_read_alternates(struct odb_source *source UNUSED, + struct strvec *out UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_write_alternate(struct odb_source *source UNUSED, + const char *alternate UNUSED) +{ + return error("in-memory source does not support alternates"); +} + +static void odb_source_inmemory_close(struct odb_source *source UNUSED) +{ +} + +static void odb_source_inmemory_reprepare(struct odb_source *source UNUSED) +{ +} + static int inmemory_object_free(const struct object_id *oid UNUSED, void *node_data, void *cb_data UNUSED) @@ -338,6 +364,8 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); source->base.free = odb_source_inmemory_free; + source->base.close = odb_source_inmemory_close; + source->base.reprepare = odb_source_inmemory_reprepare; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; @@ -346,6 +374,9 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; source->base.freshen_object = odb_source_inmemory_freshen_object; + source->base.begin_transaction = odb_source_inmemory_begin_transaction; + source->base.read_alternates = odb_source_inmemory_read_alternates; + source->base.write_alternate = odb_source_inmemory_write_alternate; return source; } From fdf74cb2cab6a4a95fd6e7e589ac6a4508bf358f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:46 +0200 Subject: [PATCH 16/17] odb: generic in-memory source Make the in-memory source generic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 8 ++++---- odb.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 24e929f03c..965ef68e4e 100644 --- a/odb.c +++ b/odb.c @@ -560,7 +560,7 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - if (!odb_source_read_object_info(&odb->inmemory_objects->base, oid, oi, flags)) + if (!odb_source_read_object_info(odb->inmemory_objects, oid, oi, flags)) return 0; odb_prepare_alternates(odb); @@ -737,7 +737,7 @@ int odb_pretend_object(struct object_database *odb, if (odb_has_object(odb, oid, 0)) return 0; - return odb_source_write_object(&odb->inmemory_objects->base, + return odb_source_write_object(odb->inmemory_objects, buf, len, type, oid, NULL, 0); } @@ -1020,7 +1020,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->inmemory_objects = odb_source_inmemory_new(o); + o->inmemory_objects = &odb_source_inmemory_new(o)->base; free(to_free); @@ -1045,7 +1045,7 @@ static void odb_free_sources(struct object_database *o) o->sources = next; } - odb_source_free(&o->inmemory_objects->base); + odb_source_free(o->inmemory_objects); o->inmemory_objects = NULL; kh_destroy_odb_path_map(o->source_by_path); diff --git a/odb.h b/odb.h index c3a7edf9c8..73553ed5a7 100644 --- a/odb.h +++ b/odb.h @@ -81,7 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct odb_source_inmemory *inmemory_objects; + struct odb_source *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. From d2902a45498793f8dc69abc6448f517b69437eec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:47 +0200 Subject: [PATCH 17/17] t/unit-tests: add tests for the in-memory object source While the in-memory object source is a full-fledged source, our code base only exercises parts of its functionality because we only use it in git-blame(1). Implement unit tests to verify that the yet-unused functionality of the backend works as expected. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + t/meson.build | 1 + t/unit-tests/u-odb-inmemory.c | 313 ++++++++++++++++++++++++++++++++++ 3 files changed, 315 insertions(+) create mode 100644 t/unit-tests/u-odb-inmemory.c diff --git a/Makefile b/Makefile index 3cda12c455..68b4daa1ad 100644 --- a/Makefile +++ b/Makefile @@ -1529,6 +1529,7 @@ CLAR_TEST_SUITES += u-hash CLAR_TEST_SUITES += u-hashmap CLAR_TEST_SUITES += u-list-objects-filter-options CLAR_TEST_SUITES += u-mem-pool +CLAR_TEST_SUITES += u-odb-inmemory CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap CLAR_TEST_SUITES += u-oidtree diff --git a/t/meson.build b/t/meson.build index 7528e5cda5..db5e01c49b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -6,6 +6,7 @@ clar_test_suites = [ 'unit-tests/u-hashmap.c', 'unit-tests/u-list-objects-filter-options.c', 'unit-tests/u-mem-pool.c', + 'unit-tests/u-odb-inmemory.c', 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', 'unit-tests/u-oidtree.c', diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c new file mode 100644 index 0000000000..482502ef4b --- /dev/null +++ b/t/unit-tests/u-odb-inmemory.c @@ -0,0 +1,313 @@ +#include "unit-test.h" +#include "hex.h" +#include "odb/source-inmemory.h" +#include "odb/streaming.h" +#include "oidset.h" +#include "repository.h" +#include "strbuf.h" + +#define RANDOM_OID "da39a3ee5e6b4b0d3255bfef95601890afd80709" +#define FOOBAR_OID "f6ea0495187600e7b2288c8ac19c5886383a4632" + +static struct repository repo = { + .hash_algo = &hash_algos[GIT_HASH_SHA1], +}; +static struct object_database *odb; + +static void cl_assert_object_info(struct odb_source_inmemory *source, + const struct object_id *oid, + enum object_type expected_type, + const char *expected_content) +{ + enum object_type actual_type; + unsigned long actual_size; + void *actual_content; + struct object_info oi = { + .typep = &actual_type, + .sizep = &actual_size, + .contentp = &actual_content, + }; + + cl_must_pass(odb_source_read_object_info(&source->base, oid, &oi, 0)); + cl_assert_equal_u(actual_size, strlen(expected_content)); + cl_assert_equal_u(actual_type, expected_type); + cl_assert_equal_s((char *) actual_content, expected_content); + + free(actual_content); +} + +void test_odb_inmemory__initialize(void) +{ + odb = odb_new(&repo, "", ""); +} + +void test_odb_inmemory__cleanup(void) +{ + odb_free(odb); +} + +void test_odb_inmemory__new(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_equal_i(source->base.type, ODB_SOURCE_INMEMORY); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_missing_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_must_fail(odb_source_read_object_info(&source->base, &oid, NULL, 0)); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_empty_tree(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_object_info(source, repo.hash_algo->empty_tree, OBJ_TREE, ""); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_written_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_stream_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_read_stream *stream; + struct object_id written_oid; + const char data[] = "foobar"; + char buf[3] = { 0 }; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_read_object_stream(&stream, &source->base, + &written_oid)); + cl_assert_equal_i(stream->type, OBJ_BLOB); + cl_assert_equal_u(stream->size, 6); + + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "fo"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ob"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ar"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 0); + + odb_read_stream_close(stream); + odb_source_free(&source->base); +} + +static int add_one_object(const struct object_id *oid, + struct object_info *oi UNUSED, + void *payload) +{ + struct oidset *actual_oids = payload; + cl_must_pass(oidset_insert(actual_oids, oid)); + return 0; +} + +void test_odb_inmemory__for_each_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct oidset expected_oids = OIDSET_INIT; + struct oidset actual_oids = OIDSET_INIT; + struct strbuf buf = STRBUF_INIT; + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_u(oidset_size(&actual_oids), 0); + + for (int i = 0; i < 10; i++) { + struct object_id written_oid; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%d", i); + + cl_must_pass(odb_source_write_object(&source->base, buf.buf, buf.len, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(oidset_insert(&expected_oids, &written_oid)); + } + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_b(oidset_equal(&expected_oids, &actual_oids), true); + + odb_source_free(&source->base); + oidset_clear(&expected_oids); + oidset_clear(&actual_oids); + strbuf_release(&buf); +} + +static int abort_after_two_objects(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned *counter = payload; + (*counter)++; + if (*counter == 2) + return 123; + return 0; +} + +void test_odb_inmemory__for_each_object_can_abort_iteration(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct object_id written_oid; + unsigned counter = 0; + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_assert_equal_i(odb_source_for_each_object(&source->base, NULL, + abort_after_two_objects, + &counter, &opts), + 123); + cl_assert_equal_u(counter, 2); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__count_objects(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + unsigned long count; + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 0); + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 3); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__find_abbrev_len(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid1, oid2; + unsigned abbrev_len; + + /* + * The two blobs we're about to write share the first 10 hex characters + * of their object IDs ("a09f43dc45"), so at least 11 characters are + * needed to tell them apart: + * + * "368317" -> a09f43dc4562d45115583f5094640ae237df55f7 + * "514796" -> a09f43dc45fef837235eb7e6b1a6ca5e169a3981 + * + * With only one blob written we expect a length of 4. + */ + cl_must_pass(odb_source_write_object(&source->base, "368317", strlen("368317"), + OBJ_BLOB, &oid1, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 4); + + /* + * With both objects present, the shared 10-character prefix means we + * need at least 11 characters to uniquely identify either object. + */ + cl_must_pass(odb_source_write_object(&source->base, "514796", strlen("514796"), + OBJ_BLOB, &oid2, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 11); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__freshen_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, &oid), 0); + + cl_must_pass(odb_source_write_object(&source->base, "foobar", + strlen("foobar"), OBJ_BLOB, + &written_oid, NULL, 0)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, + &written_oid), 1); + + odb_source_free(&source->base); +} + +struct membuf_write_stream { + struct odb_write_stream base; + const char *buf; + size_t offset; + size_t size; +}; + +static ssize_t membuf_write_stream_read(struct odb_write_stream *stream, + unsigned char *buf, size_t len) +{ + struct membuf_write_stream *s = container_of(stream, struct membuf_write_stream, base); + size_t chunk_size = 2; + + if (chunk_size > len) + chunk_size = len; + if (chunk_size > s->size - s->offset) + chunk_size = s->size - s->offset; + + memcpy(buf, s->buf + s->offset, chunk_size); + + s->offset += chunk_size; + if (s->offset == s->size) + s->base.is_finished = 1; + + return chunk_size; +} + +void test_odb_inmemory__write_object_stream(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct membuf_write_stream stream = { + .base.read = membuf_write_stream_read, + .buf = data, + .size = strlen(data), + }; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object_stream(&source->base, &stream.base, + strlen(data), &written_oid)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +}