Merge branch 'ps/odb-in-memory' into next

Add a new odb "in-memory" source that is meant to only hold
tentative objects (like the virtual blob object that represents the
working tree file used by "git blame").

* ps/odb-in-memory:
  t/unit-tests: add tests for the in-memory object source
  odb: generic in-memory source
  odb/source-inmemory: stub out remaining functions
  odb/source-inmemory: implement `freshen_object()` callback
  odb/source-inmemory: implement `count_objects()` callback
  odb/source-inmemory: implement `find_abbrev_len()` callback
  odb/source-inmemory: implement `for_each_object()` callback
  odb/source-inmemory: convert to use oidtree
  oidtree: add ability to store data
  cbtree: allow using arbitrary wrapper structures for nodes
  odb/source-inmemory: implement `write_object_stream()` callback
  odb/source-inmemory: implement `write_object()` callback
  odb/source-inmemory: implement `read_object_stream()` callback
  odb/source-inmemory: implement `read_object_info()` callback
  odb: fix unnecessary call to `find_cached_object()`
  odb/source-inmemory: implement `free()` callback
  odb: introduce "in-memory" source
This commit is contained in:
Junio C Hamano
2026-05-21 13:39:24 +09:00
16 changed files with 854 additions and 118 deletions

View File

@@ -1216,6 +1216,7 @@ LIB_OBJS += object.o
LIB_OBJS += odb.o
LIB_OBJS += odb/source.o
LIB_OBJS += odb/source-files.o
LIB_OBJS += odb/source-inmemory.o
LIB_OBJS += odb/streaming.o
LIB_OBJS += odb/transaction.o
LIB_OBJS += oid-array.o
@@ -1526,6 +1527,7 @@ CLAR_TEST_SUITES += u-hash
CLAR_TEST_SUITES += u-hashmap
CLAR_TEST_SUITES += u-list-objects-filter-options
CLAR_TEST_SUITES += u-mem-pool
CLAR_TEST_SUITES += u-odb-inmemory
CLAR_TEST_SUITES += u-oid-array
CLAR_TEST_SUITES += u-oidmap
CLAR_TEST_SUITES += u-oidtree

View File

@@ -7,6 +7,11 @@
#include "git-compat-util.h"
#include "cbtree.h"
static inline uint8_t *cb_node_key(struct cb_tree *t, struct cb_node *node)
{
return (uint8_t *) node + t->key_offset;
}
static struct cb_node *cb_node_of(const void *p)
{
return (struct cb_node *)((uintptr_t)p - 1);
@@ -33,6 +38,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen)
uint8_t c;
int newdirection;
struct cb_node **wherep, *p;
uint8_t *node_key, *p_key;
assert(!((uintptr_t)node & 1)); /* allocations must be aligned */
@@ -41,23 +47,26 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen)
return NULL; /* success */
}
node_key = cb_node_key(t, node);
/* see if a node already exists */
p = cb_internal_best_match(t->root, node->k, klen);
p = cb_internal_best_match(t->root, node_key, klen);
p_key = cb_node_key(t, p);
/* find first differing byte */
for (newbyte = 0; newbyte < klen; newbyte++) {
if (p->k[newbyte] != node->k[newbyte])
if (p_key[newbyte] != node_key[newbyte])
goto different_byte_found;
}
return p; /* element exists, let user deal with it */
different_byte_found:
newotherbits = p->k[newbyte] ^ node->k[newbyte];
newotherbits = p_key[newbyte] ^ node_key[newbyte];
newotherbits |= newotherbits >> 1;
newotherbits |= newotherbits >> 2;
newotherbits |= newotherbits >> 4;
newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
c = p->k[newbyte];
c = p_key[newbyte];
newdirection = (1 + (newotherbits | c)) >> 8;
node->byte = newbyte;
@@ -78,7 +87,7 @@ different_byte_found:
break;
if (q->byte == newbyte && q->otherbits > newotherbits)
break;
c = q->byte < klen ? node->k[q->byte] : 0;
c = q->byte < klen ? node_key[q->byte] : 0;
direction = (1 + (q->otherbits | c)) >> 8;
wherep = q->child + direction;
}
@@ -93,7 +102,7 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen)
{
struct cb_node *p = cb_internal_best_match(t->root, k, klen);
return p && !memcmp(p->k, k, klen) ? p : NULL;
return p && !memcmp(cb_node_key(t, p), k, klen) ? p : NULL;
}
static int cb_descend(struct cb_node *p, cb_iter fn, void *arg)
@@ -115,6 +124,7 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
struct cb_node *p = t->root;
struct cb_node *top = p;
size_t i = 0;
uint8_t *p_key;
if (!p)
return 0; /* empty tree */
@@ -130,8 +140,9 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
top = p;
}
p_key = cb_node_key(t, p);
for (i = 0; i < klen; i++) {
if (p->k[i] != kpfx[i])
if (p_key[i] != kpfx[i])
return 0; /* "best" match failed */
}

View File

@@ -6,9 +6,9 @@
*
* This is adapted to store arbitrary data (not just NUL-terminated C strings
* and allocates no memory internally. The user needs to allocate
* "struct cb_node" and fill cb_node.k[] with arbitrary match data
* for memcmp.
* If "klen" is variable, then it should be embedded into "c_node.k[]"
* "struct cb_node" and provide `key_offset` to indicate where the key can be
* found relative to the `struct cb_node` for memcmp.
* If "klen" is variable, then it should be embedded into the key.
* Recursion is bound by the maximum value of "klen" used.
*/
#ifndef CBTREE_H
@@ -23,18 +23,19 @@ struct cb_node {
*/
uint32_t byte;
uint8_t otherbits;
uint8_t k[FLEX_ARRAY]; /* arbitrary data, unaligned */
};
struct cb_tree {
struct cb_node *root;
ptrdiff_t key_offset;
};
#define CBTREE_INIT { 0 }
static inline void cb_init(struct cb_tree *t)
static inline void cb_init(struct cb_tree *t,
ptrdiff_t key_offset)
{
struct cb_tree blank = CBTREE_INIT;
struct cb_tree blank = {
.key_offset = key_offset,
};
memcpy(t, &blank, sizeof(*t));
}

View File

@@ -57,7 +57,7 @@ static int insert_loose_map(struct odb_source *source,
inserted |= insert_oid_pair(map->to_compat, oid, compat_oid);
inserted |= insert_oid_pair(map->to_storage, compat_oid, oid);
if (inserted)
oidtree_insert(files->loose->cache, compat_oid);
oidtree_insert(files->loose->cache, compat_oid, NULL);
return inserted;
}

View File

@@ -404,6 +404,7 @@ libgit_sources = [
'odb.c',
'odb/source.c',
'odb/source-files.c',
'odb/source-inmemory.c',
'odb/streaming.c',
'odb/transaction.c',
'oid-array.c',

View File

@@ -1858,6 +1858,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid,
}
static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
void *node_data UNUSED,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
@@ -2003,7 +2004,7 @@ static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
{
oidtree_insert(data, oid);
oidtree_insert(data, oid, NULL);
return 0;
}

82
odb.c
View File

@@ -14,6 +14,7 @@
#include "object-file.h"
#include "object-name.h"
#include "odb.h"
#include "odb/source-inmemory.h"
#include "packfile.h"
#include "path.h"
#include "promisor-remote.h"
@@ -31,40 +32,6 @@
KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
struct odb_source *, 1, fspathhash, fspatheq)
/*
* This is meant to hold a *small* number of objects that you would
* want odb_read_object() to be able to return, but yet you do not want
* to write them into the object store (e.g. a browse-only
* application).
*/
struct cached_object_entry {
struct object_id oid;
struct cached_object {
enum object_type type;
const void *buf;
unsigned long size;
} value;
};
static const struct cached_object *find_cached_object(struct object_database *object_store,
const struct object_id *oid)
{
static const struct cached_object empty_tree = {
.type = OBJ_TREE,
.buf = "",
};
const struct cached_object_entry *co = object_store->cached_objects;
for (size_t i = 0; i < object_store->cached_object_nr; i++, co++)
if (oideq(&co->oid, oid))
return &co->value;
if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree))
return &empty_tree;
return NULL;
}
int odb_mkstemp(struct object_database *odb,
struct strbuf *temp_filename, const char *pattern)
{
@@ -584,7 +551,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
const struct object_id *oid,
struct object_info *oi, unsigned flags)
{
const struct cached_object *co;
const struct object_id *real = oid;
int already_retried = 0;
@@ -594,25 +560,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
if (is_null_oid(real))
return -1;
co = find_cached_object(odb, real);
if (co) {
if (oi) {
if (oi->typep)
*(oi->typep) = co->type;
if (oi->sizep)
*(oi->sizep) = co->size;
if (oi->disk_sizep)
*(oi->disk_sizep) = 0;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
if (oi->mtimep)
*oi->mtimep = 0;
oi->whence = OI_CACHED;
}
if (!odb_source_read_object_info(odb->inmemory_objects, oid, oi, flags))
return 0;
}
odb_prepare_alternates(odb);
@@ -784,24 +733,12 @@ int odb_pretend_object(struct object_database *odb,
void *buf, unsigned long len, enum object_type type,
struct object_id *oid)
{
struct cached_object_entry *co;
char *co_buf;
hash_object_file(odb->repo->hash_algo, buf, len, type, oid);
if (odb_has_object(odb, oid, 0) ||
find_cached_object(odb, oid))
if (odb_has_object(odb, oid, 0))
return 0;
ALLOC_GROW(odb->cached_objects,
odb->cached_object_nr + 1, odb->cached_object_alloc);
co = &odb->cached_objects[odb->cached_object_nr++];
co->value.size = len;
co->value.type = type;
co_buf = xmalloc(len);
memcpy(co_buf, buf, len);
co->value.buf = co_buf;
oidcpy(&co->oid, oid);
return 0;
return odb_source_write_object(odb->inmemory_objects,
buf, len, type, oid, NULL, 0);
}
void *odb_read_object(struct object_database *odb,
@@ -1083,6 +1020,7 @@ struct object_database *odb_new(struct repository *repo,
o->sources = odb_source_new(o, primary_source, true);
o->sources_tail = &o->sources->next;
o->alternate_db = xstrdup_or_null(secondary_sources);
o->inmemory_objects = &odb_source_inmemory_new(o)->base;
free(to_free);
@@ -1106,6 +1044,10 @@ static void odb_free_sources(struct object_database *o)
odb_source_free(o->sources);
o->sources = next;
}
odb_source_free(o->inmemory_objects);
o->inmemory_objects = NULL;
kh_destroy_odb_path_map(o->source_by_path);
o->source_by_path = NULL;
}
@@ -1123,10 +1065,6 @@ void odb_free(struct object_database *o)
odb_close(o);
odb_free_sources(o);
for (size_t i = 0; i < o->cached_object_nr; i++)
free((char *) o->cached_objects[i].value.buf);
free(o->cached_objects);
string_list_clear(&o->submodule_source_paths, 0);
free(o);

4
odb.h
View File

@@ -8,6 +8,7 @@
#include "thread-utils.h"
struct cached_object_entry;
struct odb_source_inmemory;
struct packed_git;
struct repository;
struct strbuf;
@@ -80,8 +81,7 @@ struct object_database {
* to write them into the object store (e.g. a browse-only
* application).
*/
struct cached_object_entry *cached_objects;
size_t cached_object_nr, cached_object_alloc;
struct odb_source *inmemory_objects;
/*
* A fast, rough count of the number of objects in the repository.

382
odb/source-inmemory.c Normal file
View File

@@ -0,0 +1,382 @@
#include "git-compat-util.h"
#include "object-file.h"
#include "odb.h"
#include "odb/source-inmemory.h"
#include "odb/streaming.h"
#include "oidtree.h"
#include "repository.h"
struct inmemory_object {
enum object_type type;
const void *buf;
unsigned long size;
};
static const struct inmemory_object *find_cached_object(struct odb_source_inmemory *source,
const struct object_id *oid)
{
static const struct inmemory_object empty_tree = {
.type = OBJ_TREE,
.buf = "",
};
const struct inmemory_object *object;
if (source->objects) {
object = oidtree_get(source->objects, oid);
if (object)
return object;
}
if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree))
return &empty_tree;
return NULL;
}
static void populate_object_info(struct odb_source_inmemory *source,
struct object_info *oi,
const struct inmemory_object *object)
{
if (!oi)
return;
if (oi->typep)
*(oi->typep) = object->type;
if (oi->sizep)
*(oi->sizep) = object->size;
if (oi->disk_sizep)
*(oi->disk_sizep) = 0;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, source->base.odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(object->buf, object->size);
if (oi->mtimep)
*oi->mtimep = 0;
oi->whence = OI_CACHED;
}
static int odb_source_inmemory_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags UNUSED)
{
struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source);
const struct inmemory_object *object;
object = find_cached_object(inmemory, oid);
if (!object)
return -1;
populate_object_info(inmemory, oi, object);
return 0;
}
struct odb_read_stream_inmemory {
struct odb_read_stream base;
const unsigned char *buf;
size_t offset;
};
static ssize_t odb_read_stream_inmemory_read(struct odb_read_stream *stream,
char *buf, size_t buf_len)
{
struct odb_read_stream_inmemory *inmemory =
container_of(stream, struct odb_read_stream_inmemory, base);
size_t bytes = buf_len;
if (buf_len > inmemory->base.size - inmemory->offset)
bytes = inmemory->base.size - inmemory->offset;
memcpy(buf, inmemory->buf + inmemory->offset, bytes);
inmemory->offset += bytes;
return bytes;
}
static int odb_read_stream_inmemory_close(struct odb_read_stream *stream UNUSED)
{
return 0;
}
static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source);
struct odb_read_stream_inmemory *stream;
const struct inmemory_object *object;
object = find_cached_object(inmemory, oid);
if (!object)
return -1;
CALLOC_ARRAY(stream, 1);
stream->base.read = odb_read_stream_inmemory_read;
stream->base.close = odb_read_stream_inmemory_close;
stream->base.size = object->size;
stream->base.type = object->type;
stream->buf = object->buf;
*out = &stream->base;
return 0;
}
struct odb_source_inmemory_for_each_object_data {
struct odb_source_inmemory *inmemory;
const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
static int odb_source_inmemory_for_each_object_cb(const struct object_id *oid,
void *node_data, void *cb_data)
{
struct odb_source_inmemory_for_each_object_data *data = cb_data;
struct inmemory_object *object = node_data;
if (data->request) {
struct object_info oi = *data->request;
populate_object_info(data->inmemory, &oi, object);
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int odb_source_inmemory_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts)
{
struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source);
struct odb_source_inmemory_for_each_object_data payload = {
.inmemory = inmemory,
.request = request,
.cb = cb,
.cb_data = cb_data,
};
struct object_id null_oid = { 0 };
if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) ||
(opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local))
return 0;
if (!inmemory->objects)
return 0;
return oidtree_each(inmemory->objects,
opts->prefix ? opts->prefix : &null_oid, opts->prefix_hex_len,
odb_source_inmemory_for_each_object_cb, &payload);
}
struct find_abbrev_len_data {
const struct object_id *oid;
unsigned len;
};
static int find_abbrev_len_cb(const struct object_id *oid,
struct object_info *oi UNUSED,
void *cb_data)
{
struct find_abbrev_len_data *data = cb_data;
unsigned len = oid_common_prefix_hexlen(oid, data->oid);
if (len != hash_algos[oid->algo].hexsz && len >= data->len)
data->len = len + 1;
return 0;
}
static int odb_source_inmemory_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
struct odb_for_each_object_options opts = {
.prefix = oid,
.prefix_hex_len = min_len,
};
struct find_abbrev_len_data data = {
.oid = oid,
.len = min_len,
};
int ret;
ret = odb_source_inmemory_for_each_object(source, NULL, find_abbrev_len_cb,
&data, &opts);
*out = data.len;
return ret;
}
static int count_objects_cb(const struct object_id *oid UNUSED,
struct object_info *oi UNUSED,
void *cb_data)
{
unsigned long *counter = cb_data;
(*counter)++;
return 0;
}
static int odb_source_inmemory_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags UNUSED,
unsigned long *out)
{
struct odb_for_each_object_options opts = { 0 };
*out = 0;
return odb_source_inmemory_for_each_object(source, NULL, count_objects_cb,
out, &opts);
}
static int odb_source_inmemory_write_object(struct odb_source *source,
const void *buf, unsigned long len,
enum object_type type,
struct object_id *oid,
struct object_id *compat_oid UNUSED,
enum odb_write_object_flags flags UNUSED)
{
struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source);
struct inmemory_object *object;
hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid);
if (!inmemory->objects) {
CALLOC_ARRAY(inmemory->objects, 1);
oidtree_init(inmemory->objects);
} else if (oidtree_contains(inmemory->objects, oid)) {
return 0;
}
CALLOC_ARRAY(object, 1);
object->size = len;
object->type = type;
object->buf = xmemdupz(buf, len);
oidtree_insert(inmemory->objects, oid, object);
return 0;
}
static int odb_source_inmemory_write_object_stream(struct odb_source *source,
struct odb_write_stream *stream,
size_t len,
struct object_id *oid)
{
char buf[16384];
size_t total_read = 0;
char *data;
int ret;
CALLOC_ARRAY(data, len);
while (!stream->is_finished) {
ssize_t bytes_read;
bytes_read = odb_write_stream_read(stream, buf, sizeof(buf));
if (total_read + bytes_read > len) {
ret = error("object stream yielded more bytes than expected");
goto out;
}
memcpy(data + total_read, buf, bytes_read);
total_read += bytes_read;
}
if (total_read != len) {
ret = error("object stream yielded less bytes than expected");
goto out;
}
ret = odb_source_inmemory_write_object(source, data, len, OBJ_BLOB, oid,
NULL, 0);
if (ret < 0)
goto out;
out:
free(data);
return ret;
}
static int odb_source_inmemory_freshen_object(struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source);
if (find_cached_object(inmemory, oid))
return 1;
return 0;
}
static int odb_source_inmemory_begin_transaction(struct odb_source *source UNUSED,
struct odb_transaction **out UNUSED)
{
return error("in-memory source does not support transactions");
}
static int odb_source_inmemory_read_alternates(struct odb_source *source UNUSED,
struct strvec *out UNUSED)
{
return 0;
}
static int odb_source_inmemory_write_alternate(struct odb_source *source UNUSED,
const char *alternate UNUSED)
{
return error("in-memory source does not support alternates");
}
static void odb_source_inmemory_close(struct odb_source *source UNUSED)
{
}
static void odb_source_inmemory_reprepare(struct odb_source *source UNUSED)
{
}
static int inmemory_object_free(const struct object_id *oid UNUSED,
void *node_data,
void *cb_data UNUSED)
{
struct inmemory_object *object = node_data;
free((void *) object->buf);
free(object);
return 0;
}
static void odb_source_inmemory_free(struct odb_source *source)
{
struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source);
if (inmemory->objects) {
struct object_id null_oid = { 0 };
oidtree_each(inmemory->objects, &null_oid, 0,
inmemory_object_free, NULL);
oidtree_clear(inmemory->objects);
free(inmemory->objects);
}
free(inmemory->base.path);
free(inmemory);
}
struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb)
{
struct odb_source_inmemory *source;
CALLOC_ARRAY(source, 1);
odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false);
source->base.free = odb_source_inmemory_free;
source->base.close = odb_source_inmemory_close;
source->base.reprepare = odb_source_inmemory_reprepare;
source->base.read_object_info = odb_source_inmemory_read_object_info;
source->base.read_object_stream = odb_source_inmemory_read_object_stream;
source->base.for_each_object = odb_source_inmemory_for_each_object;
source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len;
source->base.count_objects = odb_source_inmemory_count_objects;
source->base.write_object = odb_source_inmemory_write_object;
source->base.write_object_stream = odb_source_inmemory_write_object_stream;
source->base.freshen_object = odb_source_inmemory_freshen_object;
source->base.begin_transaction = odb_source_inmemory_begin_transaction;
source->base.read_alternates = odb_source_inmemory_read_alternates;
source->base.write_alternate = odb_source_inmemory_write_alternate;
return source;
}

33
odb/source-inmemory.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef ODB_SOURCE_INMEMORY_H
#define ODB_SOURCE_INMEMORY_H
#include "odb/source.h"
struct oidtree;
/*
* An in-memory source that you can write objects to that shall be made
* available for reading, but that shouldn't ever be persisted to disk. Note
* that any objects written to this source will be stored in memory, so the
* number of objects you can store is limited by available system memory.
*/
struct odb_source_inmemory {
struct odb_source base;
struct oidtree *objects;
};
/* Create a new in-memory object database source. */
struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb);
/*
* Cast the given object database source to the in-memory backend. This will
* cause a BUG in case the source doesn't use this backend.
*/
static inline struct odb_source_inmemory *odb_source_inmemory_downcast(struct odb_source *source)
{
if (source->type != ODB_SOURCE_INMEMORY)
BUG("trying to downcast source of type '%d' to in-memory", source->type);
return container_of(source, struct odb_source_inmemory, base);
}
#endif

View File

@@ -13,6 +13,9 @@ enum odb_source_type {
/* The "files" backend that uses loose objects and packfiles. */
ODB_SOURCE_FILES,
/* The "in-memory" backend that stores objects in memory. */
ODB_SOURCE_INMEMORY,
};
struct object_id;

View File

@@ -6,9 +6,15 @@
#include "oidtree.h"
#include "hash.h"
struct oidtree_node {
struct cb_node base;
struct object_id key;
void *data;
};
void oidtree_init(struct oidtree *ot)
{
cb_init(&ot->tree);
cb_init(&ot->tree, offsetof(struct oidtree_node, key));
mem_pool_init(&ot->mem_pool, 0);
}
@@ -20,22 +26,22 @@ void oidtree_clear(struct oidtree *ot)
}
}
void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
struct oidtree_data {
struct object_id oid;
};
void oidtree_insert(struct oidtree *ot, const struct object_id *oid,
void *data)
{
struct cb_node *on;
struct object_id k;
struct oidtree_node *on;
struct cb_node *node;
if (!oid->algo)
BUG("oidtree_insert requires oid->algo");
on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid));
/*
* Clear the padding and copy the result in separate steps to
* respect the 4-byte alignment needed by struct object_id.
*/
oidcpy(&k, oid);
memcpy(on->k, &k, sizeof(k));
on = mem_pool_alloc(&ot->mem_pool, sizeof(*on));
oidcpy(&on->key, oid);
on->data = data;
/*
* n.b. Current callers won't get us duplicates, here. If a
@@ -43,13 +49,19 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
* that won't be freed until oidtree_clear. Currently it's not
* worth maintaining a free list
*/
cb_insert(&ot->tree, on, sizeof(*oid));
node = cb_insert(&ot->tree, &on->base, sizeof(*oid));
if (node) {
struct oidtree_node *preexisting = container_of(node, struct oidtree_node, base);
preexisting->data = data;
}
}
bool oidtree_contains(struct oidtree *ot, const struct object_id *oid)
static struct oidtree_node *oidtree_lookup(struct oidtree *ot,
const struct object_id *oid)
{
struct object_id k;
size_t klen = sizeof(k);
struct cb_node *node;
oidcpy(&k, oid);
@@ -60,7 +72,20 @@ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid)
klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) <
offsetof(struct object_id, algo));
return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen);
node = cb_lookup(&ot->tree, (const uint8_t *)&k, klen);
return node ? container_of(node, struct oidtree_node, base) : NULL;
}
bool oidtree_contains(struct oidtree *ot, const struct object_id *oid)
{
struct oidtree_node *node = oidtree_lookup(ot, oid);
return node ? 1 : 0;
}
void *oidtree_get(struct oidtree *ot, const struct object_id *oid)
{
struct oidtree_node *node = oidtree_lookup(ot, oid);
return node ? node->data : NULL;
}
struct oidtree_each_data {
@@ -73,21 +98,18 @@ struct oidtree_each_data {
static int iter(struct cb_node *n, void *cb_data)
{
struct oidtree_node *node = container_of(n, struct oidtree_node, base);
struct oidtree_each_data *data = cb_data;
struct object_id k;
/* Copy to provide 4-byte alignment needed by struct object_id. */
memcpy(&k, n->k, sizeof(k));
if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo)
if (data->algo != GIT_HASH_UNKNOWN && data->algo != node->key.algo)
return 0;
if (data->last_nibble_at) {
if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0)
if ((node->key.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0)
return 0;
}
return data->cb(&k, data->cb_data);
return data->cb(&node->key, node->data, data->cb_data);
}
int oidtree_each(struct oidtree *ot, const struct object_id *prefix,

View File

@@ -29,18 +29,26 @@ void oidtree_init(struct oidtree *ot);
*/
void oidtree_clear(struct oidtree *ot);
/* Insert the object ID into the tree. */
void oidtree_insert(struct oidtree *ot, const struct object_id *oid);
/*
* Insert the object ID into the tree and store the given pointer alongside
* with it. The data pointer of any preexisting entry will be overwritten.
*/
void oidtree_insert(struct oidtree *ot, const struct object_id *oid,
void *data);
/* Check whether the tree contains the given object ID. */
bool oidtree_contains(struct oidtree *ot, const struct object_id *oid);
/* Get the payload stored with the given object ID. */
void *oidtree_get(struct oidtree *ot, const struct object_id *oid);
/*
* Callback function used for `oidtree_each()`. Returning a non-zero exit code
* will cause iteration to stop. The exit code will be propagated to the caller
* of `oidtree_each()`.
*/
typedef int (*oidtree_each_cb)(const struct object_id *oid,
void *node_data,
void *cb_data);
/*

View File

@@ -6,6 +6,7 @@ clar_test_suites = [
'unit-tests/u-hashmap.c',
'unit-tests/u-list-objects-filter-options.c',
'unit-tests/u-mem-pool.c',
'unit-tests/u-odb-inmemory.c',
'unit-tests/u-oid-array.c',
'unit-tests/u-oidmap.c',
'unit-tests/u-oidtree.c',

View File

@@ -0,0 +1,313 @@
#include "unit-test.h"
#include "hex.h"
#include "odb/source-inmemory.h"
#include "odb/streaming.h"
#include "oidset.h"
#include "repository.h"
#include "strbuf.h"
#define RANDOM_OID "da39a3ee5e6b4b0d3255bfef95601890afd80709"
#define FOOBAR_OID "f6ea0495187600e7b2288c8ac19c5886383a4632"
static struct repository repo = {
.hash_algo = &hash_algos[GIT_HASH_SHA1],
};
static struct object_database *odb;
static void cl_assert_object_info(struct odb_source_inmemory *source,
const struct object_id *oid,
enum object_type expected_type,
const char *expected_content)
{
enum object_type actual_type;
unsigned long actual_size;
void *actual_content;
struct object_info oi = {
.typep = &actual_type,
.sizep = &actual_size,
.contentp = &actual_content,
};
cl_must_pass(odb_source_read_object_info(&source->base, oid, &oi, 0));
cl_assert_equal_u(actual_size, strlen(expected_content));
cl_assert_equal_u(actual_type, expected_type);
cl_assert_equal_s((char *) actual_content, expected_content);
free(actual_content);
}
void test_odb_inmemory__initialize(void)
{
odb = odb_new(&repo, "", "");
}
void test_odb_inmemory__cleanup(void)
{
odb_free(odb);
}
void test_odb_inmemory__new(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
cl_assert_equal_i(source->base.type, ODB_SOURCE_INMEMORY);
odb_source_free(&source->base);
}
void test_odb_inmemory__read_missing_object(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct object_id oid;
const char *end;
cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo));
cl_must_fail(odb_source_read_object_info(&source->base, &oid, NULL, 0));
odb_source_free(&source->base);
}
void test_odb_inmemory__read_empty_tree(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
cl_assert_object_info(source, repo.hash_algo->empty_tree, OBJ_TREE, "");
odb_source_free(&source->base);
}
void test_odb_inmemory__read_written_object(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
const char data[] = "foobar";
struct object_id written_oid;
cl_must_pass(odb_source_write_object(&source->base, data, strlen(data),
OBJ_BLOB, &written_oid, NULL, 0));
cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID);
cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar");
odb_source_free(&source->base);
}
void test_odb_inmemory__read_stream_object(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct odb_read_stream *stream;
struct object_id written_oid;
const char data[] = "foobar";
char buf[3] = { 0 };
cl_must_pass(odb_source_write_object(&source->base, data, strlen(data),
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(odb_source_read_object_stream(&stream, &source->base,
&written_oid));
cl_assert_equal_i(stream->type, OBJ_BLOB);
cl_assert_equal_u(stream->size, 6);
cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2);
cl_assert_equal_s(buf, "fo");
cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2);
cl_assert_equal_s(buf, "ob");
cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2);
cl_assert_equal_s(buf, "ar");
cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 0);
odb_read_stream_close(stream);
odb_source_free(&source->base);
}
static int add_one_object(const struct object_id *oid,
struct object_info *oi UNUSED,
void *payload)
{
struct oidset *actual_oids = payload;
cl_must_pass(oidset_insert(actual_oids, oid));
return 0;
}
void test_odb_inmemory__for_each_object(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct odb_for_each_object_options opts = { 0 };
struct oidset expected_oids = OIDSET_INIT;
struct oidset actual_oids = OIDSET_INIT;
struct strbuf buf = STRBUF_INIT;
cl_must_pass(odb_source_for_each_object(&source->base, NULL,
add_one_object, &actual_oids, &opts));
cl_assert_equal_u(oidset_size(&actual_oids), 0);
for (int i = 0; i < 10; i++) {
struct object_id written_oid;
strbuf_reset(&buf);
strbuf_addf(&buf, "%d", i);
cl_must_pass(odb_source_write_object(&source->base, buf.buf, buf.len,
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(oidset_insert(&expected_oids, &written_oid));
}
cl_must_pass(odb_source_for_each_object(&source->base, NULL,
add_one_object, &actual_oids, &opts));
cl_assert_equal_b(oidset_equal(&expected_oids, &actual_oids), true);
odb_source_free(&source->base);
oidset_clear(&expected_oids);
oidset_clear(&actual_oids);
strbuf_release(&buf);
}
static int abort_after_two_objects(const struct object_id *oid UNUSED,
struct object_info *oi UNUSED,
void *payload)
{
unsigned *counter = payload;
(*counter)++;
if (*counter == 2)
return 123;
return 0;
}
void test_odb_inmemory__for_each_object_can_abort_iteration(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct odb_for_each_object_options opts = { 0 };
struct object_id written_oid;
unsigned counter = 0;
cl_must_pass(odb_source_write_object(&source->base, "1", 1,
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(odb_source_write_object(&source->base, "2", 1,
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(odb_source_write_object(&source->base, "3", 1,
OBJ_BLOB, &written_oid, NULL, 0));
cl_assert_equal_i(odb_source_for_each_object(&source->base, NULL,
abort_after_two_objects,
&counter, &opts),
123);
cl_assert_equal_u(counter, 2);
odb_source_free(&source->base);
}
void test_odb_inmemory__count_objects(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct object_id written_oid;
unsigned long count;
cl_must_pass(odb_source_count_objects(&source->base, 0, &count));
cl_assert_equal_u(count, 0);
cl_must_pass(odb_source_write_object(&source->base, "1", 1,
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(odb_source_write_object(&source->base, "2", 1,
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(odb_source_write_object(&source->base, "3", 1,
OBJ_BLOB, &written_oid, NULL, 0));
cl_must_pass(odb_source_count_objects(&source->base, 0, &count));
cl_assert_equal_u(count, 3);
odb_source_free(&source->base);
}
void test_odb_inmemory__find_abbrev_len(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct object_id oid1, oid2;
unsigned abbrev_len;
/*
* The two blobs we're about to write share the first 10 hex characters
* of their object IDs ("a09f43dc45"), so at least 11 characters are
* needed to tell them apart:
*
* "368317" -> a09f43dc4562d45115583f5094640ae237df55f7
* "514796" -> a09f43dc45fef837235eb7e6b1a6ca5e169a3981
*
* With only one blob written we expect a length of 4.
*/
cl_must_pass(odb_source_write_object(&source->base, "368317", strlen("368317"),
OBJ_BLOB, &oid1, NULL, 0));
cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4,
&abbrev_len));
cl_assert_equal_u(abbrev_len, 4);
/*
* With both objects present, the shared 10-character prefix means we
* need at least 11 characters to uniquely identify either object.
*/
cl_must_pass(odb_source_write_object(&source->base, "514796", strlen("514796"),
OBJ_BLOB, &oid2, NULL, 0));
cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4,
&abbrev_len));
cl_assert_equal_u(abbrev_len, 11);
odb_source_free(&source->base);
}
void test_odb_inmemory__freshen_object(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
struct object_id written_oid;
struct object_id oid;
const char *end;
cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo));
cl_assert_equal_i(odb_source_freshen_object(&source->base, &oid), 0);
cl_must_pass(odb_source_write_object(&source->base, "foobar",
strlen("foobar"), OBJ_BLOB,
&written_oid, NULL, 0));
cl_assert_equal_i(odb_source_freshen_object(&source->base,
&written_oid), 1);
odb_source_free(&source->base);
}
struct membuf_write_stream {
struct odb_write_stream base;
const char *buf;
size_t offset;
size_t size;
};
static ssize_t membuf_write_stream_read(struct odb_write_stream *stream,
unsigned char *buf, size_t len)
{
struct membuf_write_stream *s = container_of(stream, struct membuf_write_stream, base);
size_t chunk_size = 2;
if (chunk_size > len)
chunk_size = len;
if (chunk_size > s->size - s->offset)
chunk_size = s->size - s->offset;
memcpy(buf, s->buf + s->offset, chunk_size);
s->offset += chunk_size;
if (s->offset == s->size)
s->base.is_finished = 1;
return chunk_size;
}
void test_odb_inmemory__write_object_stream(void)
{
struct odb_source_inmemory *source = odb_source_inmemory_new(odb);
const char data[] = "foobar";
struct membuf_write_stream stream = {
.base.read = membuf_write_stream_read,
.buf = data,
.size = strlen(data),
};
struct object_id written_oid;
cl_must_pass(odb_source_write_object_stream(&source->base, &stream.base,
strlen(data), &written_oid));
cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID);
cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar");
odb_source_free(&source->base);
}

View File

@@ -19,7 +19,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n)
for (size_t i = 0; i < n; i++) {
struct object_id oid;
cl_parse_any_oid(hexes[i], &oid);
oidtree_insert(ot, &oid);
oidtree_insert(ot, &oid, NULL);
}
return 0;
}
@@ -38,9 +38,9 @@ struct expected_hex_iter {
const char *query;
};
static int check_each_cb(const struct object_id *oid, void *data)
static int check_each_cb(const struct object_id *oid, void *node_data UNUSED, void *cb_data)
{
struct expected_hex_iter *hex_iter = data;
struct expected_hex_iter *hex_iter = cb_data;
struct object_id expected;
cl_assert(hex_iter->i < hex_iter->expected_hexes.nr);
@@ -105,3 +105,23 @@ void test_oidtree__each(void)
check_each(&ot, "32100", "321", NULL);
check_each(&ot, "32", "320", "321", NULL);
}
void test_oidtree__insert_overwrites_data(void)
{
struct object_id oid;
struct oidtree ot;
int a, b;
cl_parse_any_oid("1", &oid);
oidtree_init(&ot);
oidtree_insert(&ot, &oid, NULL);
cl_assert_equal_p(oidtree_get(&ot, &oid), NULL);
oidtree_insert(&ot, &oid, &a);
cl_assert_equal_p(oidtree_get(&ot, &oid), &a);
oidtree_insert(&ot, &oid, &b);
cl_assert_equal_p(oidtree_get(&ot, &oid), &b);
oidtree_clear(&ot);
}