diff --git a/Makefile b/Makefile index b88c40ac9c..a43b8ee067 100644 --- a/Makefile +++ b/Makefile @@ -1216,6 +1216,7 @@ LIB_OBJS += object.o LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o +LIB_OBJS += odb/source-inmemory.o LIB_OBJS += odb/streaming.o LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o @@ -1526,6 +1527,7 @@ CLAR_TEST_SUITES += u-hash CLAR_TEST_SUITES += u-hashmap CLAR_TEST_SUITES += u-list-objects-filter-options CLAR_TEST_SUITES += u-mem-pool +CLAR_TEST_SUITES += u-odb-inmemory CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap CLAR_TEST_SUITES += u-oidtree diff --git a/cbtree.c b/cbtree.c index 4ab794bddc..8f5edbb80a 100644 --- a/cbtree.c +++ b/cbtree.c @@ -7,6 +7,11 @@ #include "git-compat-util.h" #include "cbtree.h" +static inline uint8_t *cb_node_key(struct cb_tree *t, struct cb_node *node) +{ + return (uint8_t *) node + t->key_offset; +} + static struct cb_node *cb_node_of(const void *p) { return (struct cb_node *)((uintptr_t)p - 1); @@ -33,6 +38,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) uint8_t c; int newdirection; struct cb_node **wherep, *p; + uint8_t *node_key, *p_key; assert(!((uintptr_t)node & 1)); /* allocations must be aligned */ @@ -41,23 +47,26 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) return NULL; /* success */ } + node_key = cb_node_key(t, node); + /* see if a node already exists */ - p = cb_internal_best_match(t->root, node->k, klen); + p = cb_internal_best_match(t->root, node_key, klen); + p_key = cb_node_key(t, p); /* find first differing byte */ for (newbyte = 0; newbyte < klen; newbyte++) { - if (p->k[newbyte] != node->k[newbyte]) + if (p_key[newbyte] != node_key[newbyte]) goto different_byte_found; } return p; /* element exists, let user deal with it */ different_byte_found: - newotherbits = p->k[newbyte] ^ node->k[newbyte]; + newotherbits = p_key[newbyte] ^ node_key[newbyte]; newotherbits |= newotherbits >> 1; newotherbits |= newotherbits >> 2; newotherbits |= newotherbits >> 4; newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; - c = p->k[newbyte]; + c = p_key[newbyte]; newdirection = (1 + (newotherbits | c)) >> 8; node->byte = newbyte; @@ -78,7 +87,7 @@ different_byte_found: break; if (q->byte == newbyte && q->otherbits > newotherbits) break; - c = q->byte < klen ? node->k[q->byte] : 0; + c = q->byte < klen ? node_key[q->byte] : 0; direction = (1 + (q->otherbits | c)) >> 8; wherep = q->child + direction; } @@ -93,7 +102,7 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) { struct cb_node *p = cb_internal_best_match(t->root, k, klen); - return p && !memcmp(p->k, k, klen) ? p : NULL; + return p && !memcmp(cb_node_key(t, p), k, klen) ? p : NULL; } static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) @@ -115,6 +124,7 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; + uint8_t *p_key; if (!p) return 0; /* empty tree */ @@ -130,8 +140,9 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, top = p; } + p_key = cb_node_key(t, p); for (i = 0; i < klen; i++) { - if (p->k[i] != kpfx[i]) + if (p_key[i] != kpfx[i]) return 0; /* "best" match failed */ } diff --git a/cbtree.h b/cbtree.h index c374b1b3db..4647d4a32f 100644 --- a/cbtree.h +++ b/cbtree.h @@ -6,9 +6,9 @@ * * This is adapted to store arbitrary data (not just NUL-terminated C strings * and allocates no memory internally. The user needs to allocate - * "struct cb_node" and fill cb_node.k[] with arbitrary match data - * for memcmp. - * If "klen" is variable, then it should be embedded into "c_node.k[]" + * "struct cb_node" and provide `key_offset` to indicate where the key can be + * found relative to the `struct cb_node` for memcmp. + * If "klen" is variable, then it should be embedded into the key. * Recursion is bound by the maximum value of "klen" used. */ #ifndef CBTREE_H @@ -23,18 +23,19 @@ struct cb_node { */ uint32_t byte; uint8_t otherbits; - uint8_t k[FLEX_ARRAY]; /* arbitrary data, unaligned */ }; struct cb_tree { struct cb_node *root; + ptrdiff_t key_offset; }; -#define CBTREE_INIT { 0 } - -static inline void cb_init(struct cb_tree *t) +static inline void cb_init(struct cb_tree *t, + ptrdiff_t key_offset) { - struct cb_tree blank = CBTREE_INIT; + struct cb_tree blank = { + .key_offset = key_offset, + }; memcpy(t, &blank, sizeof(*t)); } diff --git a/loose.c b/loose.c index 07333be696..f7a3dd1a72 100644 --- a/loose.c +++ b/loose.c @@ -57,7 +57,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(files->loose->cache, compat_oid); + oidtree_insert(files->loose->cache, compat_oid, NULL); return inserted; } diff --git a/meson.build b/meson.build index 6622aedd97..664d831329 100644 --- a/meson.build +++ b/meson.build @@ -404,6 +404,7 @@ libgit_sources = [ 'odb.c', 'odb/source.c', 'odb/source-files.c', + 'odb/source-inmemory.c', 'odb/streaming.c', 'odb/transaction.c', 'oid-array.c', diff --git a/object-file.c b/object-file.c index 747314fe00..90f995d000 100644 --- a/object-file.c +++ b/object-file.c @@ -1858,6 +1858,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *node_data UNUSED, void *cb_data) { struct for_each_object_wrapper_data *data = cb_data; @@ -2003,7 +2004,7 @@ static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) { - oidtree_insert(data, oid); + oidtree_insert(data, oid, NULL); return 0; } diff --git a/odb.c b/odb.c index 40a5e9c4e0..965ef68e4e 100644 --- a/odb.c +++ b/odb.c @@ -14,6 +14,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source-inmemory.h" #include "packfile.h" #include "path.h" #include "promisor-remote.h" @@ -31,40 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -/* - * This is meant to hold a *small* number of objects that you would - * want odb_read_object() to be able to return, but yet you do not want - * to write them into the object store (e.g. a browse-only - * application). - */ -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; - -static const struct cached_object *find_cached_object(struct object_database *object_store, - const struct object_id *oid) -{ - static const struct cached_object empty_tree = { - .type = OBJ_TREE, - .buf = "", - }; - const struct cached_object_entry *co = object_store->cached_objects; - - for (size_t i = 0; i < object_store->cached_object_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; - - if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) - return &empty_tree; - - return NULL; -} - int odb_mkstemp(struct object_database *odb, struct strbuf *temp_filename, const char *pattern) { @@ -584,7 +551,6 @@ static int do_oid_object_info_extended(struct object_database *odb, const struct object_id *oid, struct object_info *oi, unsigned flags) { - const struct cached_object *co; const struct object_id *real = oid; int already_retried = 0; @@ -594,25 +560,8 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - co = find_cached_object(odb, real); - if (co) { - if (oi) { - if (oi->typep) - *(oi->typep) = co->type; - if (oi->sizep) - *(oi->sizep) = co->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(co->buf, co->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } + if (!odb_source_read_object_info(odb->inmemory_objects, oid, oi, flags)) return 0; - } odb_prepare_alternates(odb); @@ -784,24 +733,12 @@ int odb_pretend_object(struct object_database *odb, void *buf, unsigned long len, enum object_type type, struct object_id *oid) { - struct cached_object_entry *co; - char *co_buf; - hash_object_file(odb->repo->hash_algo, buf, len, type, oid); - if (odb_has_object(odb, oid, 0) || - find_cached_object(odb, oid)) + if (odb_has_object(odb, oid, 0)) return 0; - ALLOC_GROW(odb->cached_objects, - odb->cached_object_nr + 1, odb->cached_object_alloc); - co = &odb->cached_objects[odb->cached_object_nr++]; - co->value.size = len; - co->value.type = type; - co_buf = xmalloc(len); - memcpy(co_buf, buf, len); - co->value.buf = co_buf; - oidcpy(&co->oid, oid); - return 0; + return odb_source_write_object(odb->inmemory_objects, + buf, len, type, oid, NULL, 0); } void *odb_read_object(struct object_database *odb, @@ -1083,6 +1020,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->inmemory_objects = &odb_source_inmemory_new(o)->base; free(to_free); @@ -1106,6 +1044,10 @@ static void odb_free_sources(struct object_database *o) odb_source_free(o->sources); o->sources = next; } + + odb_source_free(o->inmemory_objects); + o->inmemory_objects = NULL; + kh_destroy_odb_path_map(o->source_by_path); o->source_by_path = NULL; } @@ -1123,10 +1065,6 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->cached_object_nr; i++) - free((char *) o->cached_objects[i].value.buf); - free(o->cached_objects); - string_list_clear(&o->submodule_source_paths, 0); free(o); diff --git a/odb.h b/odb.h index 9eb8355aca..73553ed5a7 100644 --- a/odb.h +++ b/odb.h @@ -8,6 +8,7 @@ #include "thread-utils.h" struct cached_object_entry; +struct odb_source_inmemory; struct packed_git; struct repository; struct strbuf; @@ -80,8 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct cached_object_entry *cached_objects; - size_t cached_object_nr, cached_object_alloc; + struct odb_source *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c new file mode 100644 index 0000000000..e004566d76 --- /dev/null +++ b/odb/source-inmemory.c @@ -0,0 +1,382 @@ +#include "git-compat-util.h" +#include "object-file.h" +#include "odb.h" +#include "odb/source-inmemory.h" +#include "odb/streaming.h" +#include "oidtree.h" +#include "repository.h" + +struct inmemory_object { + enum object_type type; + const void *buf; + unsigned long size; +}; + +static const struct inmemory_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) +{ + static const struct inmemory_object empty_tree = { + .type = OBJ_TREE, + .buf = "", + }; + const struct inmemory_object *object; + + if (source->objects) { + object = oidtree_get(source->objects, oid); + if (object) + return object; + } + + if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) + return &empty_tree; + + return NULL; +} + +static void populate_object_info(struct odb_source_inmemory *source, + struct object_info *oi, + const struct inmemory_object *object) +{ + if (!oi) + return; + + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->base.odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; +} + +static int odb_source_inmemory_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + const struct inmemory_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + populate_object_info(inmemory, oi, object); + return 0; +} + +struct odb_read_stream_inmemory { + struct odb_read_stream base; + const unsigned char *buf; + size_t offset; +}; + +static ssize_t odb_read_stream_inmemory_read(struct odb_read_stream *stream, + char *buf, size_t buf_len) +{ + struct odb_read_stream_inmemory *inmemory = + container_of(stream, struct odb_read_stream_inmemory, base); + size_t bytes = buf_len; + + if (buf_len > inmemory->base.size - inmemory->offset) + bytes = inmemory->base.size - inmemory->offset; + + memcpy(buf, inmemory->buf + inmemory->offset, bytes); + inmemory->offset += bytes; + + return bytes; +} + +static int odb_read_stream_inmemory_close(struct odb_read_stream *stream UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_read_stream_inmemory *stream; + const struct inmemory_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + CALLOC_ARRAY(stream, 1); + stream->base.read = odb_read_stream_inmemory_read; + stream->base.close = odb_read_stream_inmemory_close; + stream->base.size = object->size; + stream->base.type = object->type; + stream->buf = object->buf; + + *out = &stream->base; + return 0; +} + +struct odb_source_inmemory_for_each_object_data { + struct odb_source_inmemory *inmemory; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int odb_source_inmemory_for_each_object_cb(const struct object_id *oid, + void *node_data, void *cb_data) +{ + struct odb_source_inmemory_for_each_object_data *data = cb_data; + struct inmemory_object *object = node_data; + + if (data->request) { + struct object_info oi = *data->request; + populate_object_info(data->inmemory, &oi, object); + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int odb_source_inmemory_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_source_inmemory_for_each_object_data payload = { + .inmemory = inmemory, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + struct object_id null_oid = { 0 }; + + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) || + (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)) + return 0; + if (!inmemory->objects) + return 0; + + return oidtree_each(inmemory->objects, + opts->prefix ? opts->prefix : &null_oid, opts->prefix_hex_len, + odb_source_inmemory_for_each_object_cb, &payload); +} + +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_inmemory_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + +static int count_objects_cb(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *cb_data) +{ + unsigned long *counter = cb_data; + (*counter)++; + return 0; +} + +static int odb_source_inmemory_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags UNUSED, + unsigned long *out) +{ + struct odb_for_each_object_options opts = { 0 }; + *out = 0; + return odb_source_inmemory_for_each_object(source, NULL, count_objects_cb, + out, &opts); +} + +static int odb_source_inmemory_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid UNUSED, + enum odb_write_object_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct inmemory_object *object; + + hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); + + if (!inmemory->objects) { + CALLOC_ARRAY(inmemory->objects, 1); + oidtree_init(inmemory->objects); + } else if (oidtree_contains(inmemory->objects, oid)) { + return 0; + } + + CALLOC_ARRAY(object, 1); + object->size = len; + object->type = type; + object->buf = xmemdupz(buf, len); + + oidtree_insert(inmemory->objects, oid, object); + + return 0; +} + +static int odb_source_inmemory_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + char buf[16384]; + size_t total_read = 0; + char *data; + int ret; + + CALLOC_ARRAY(data, len); + while (!stream->is_finished) { + ssize_t bytes_read; + + bytes_read = odb_write_stream_read(stream, buf, sizeof(buf)); + if (total_read + bytes_read > len) { + ret = error("object stream yielded more bytes than expected"); + goto out; + } + + memcpy(data + total_read, buf, bytes_read); + total_read += bytes_read; + } + + if (total_read != len) { + ret = error("object stream yielded less bytes than expected"); + goto out; + } + + ret = odb_source_inmemory_write_object(source, data, len, OBJ_BLOB, oid, + NULL, 0); + if (ret < 0) + goto out; + +out: + free(data); + return ret; +} + +static int odb_source_inmemory_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + if (find_cached_object(inmemory, oid)) + return 1; + return 0; +} + +static int odb_source_inmemory_begin_transaction(struct odb_source *source UNUSED, + struct odb_transaction **out UNUSED) +{ + return error("in-memory source does not support transactions"); +} + +static int odb_source_inmemory_read_alternates(struct odb_source *source UNUSED, + struct strvec *out UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_write_alternate(struct odb_source *source UNUSED, + const char *alternate UNUSED) +{ + return error("in-memory source does not support alternates"); +} + +static void odb_source_inmemory_close(struct odb_source *source UNUSED) +{ +} + +static void odb_source_inmemory_reprepare(struct odb_source *source UNUSED) +{ +} + +static int inmemory_object_free(const struct object_id *oid UNUSED, + void *node_data, + void *cb_data UNUSED) +{ + struct inmemory_object *object = node_data; + free((void *) object->buf); + free(object); + return 0; +} + +static void odb_source_inmemory_free(struct odb_source *source) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + + if (inmemory->objects) { + struct object_id null_oid = { 0 }; + + oidtree_each(inmemory->objects, &null_oid, 0, + inmemory_object_free, NULL); + oidtree_clear(inmemory->objects); + free(inmemory->objects); + } + + free(inmemory->base.path); + free(inmemory); +} + +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) +{ + struct odb_source_inmemory *source; + + CALLOC_ARRAY(source, 1); + odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + + source->base.free = odb_source_inmemory_free; + source->base.close = odb_source_inmemory_close; + source->base.reprepare = odb_source_inmemory_reprepare; + source->base.read_object_info = odb_source_inmemory_read_object_info; + source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.for_each_object = odb_source_inmemory_for_each_object; + source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; + source->base.count_objects = odb_source_inmemory_count_objects; + source->base.write_object = odb_source_inmemory_write_object; + source->base.write_object_stream = odb_source_inmemory_write_object_stream; + source->base.freshen_object = odb_source_inmemory_freshen_object; + source->base.begin_transaction = odb_source_inmemory_begin_transaction; + source->base.read_alternates = odb_source_inmemory_read_alternates; + source->base.write_alternate = odb_source_inmemory_write_alternate; + + return source; +} diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h new file mode 100644 index 0000000000..a88fc2e320 --- /dev/null +++ b/odb/source-inmemory.h @@ -0,0 +1,33 @@ +#ifndef ODB_SOURCE_INMEMORY_H +#define ODB_SOURCE_INMEMORY_H + +#include "odb/source.h" + +struct oidtree; + +/* + * An in-memory source that you can write objects to that shall be made + * available for reading, but that shouldn't ever be persisted to disk. Note + * that any objects written to this source will be stored in memory, so the + * number of objects you can store is limited by available system memory. + */ +struct odb_source_inmemory { + struct odb_source base; + struct oidtree *objects; +}; + +/* Create a new in-memory object database source. */ +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb); + +/* + * Cast the given object database source to the in-memory backend. This will + * cause a BUG in case the source doesn't use this backend. + */ +static inline struct odb_source_inmemory *odb_source_inmemory_downcast(struct odb_source *source) +{ + if (source->type != ODB_SOURCE_INMEMORY) + BUG("trying to downcast source of type '%d' to in-memory", source->type); + return container_of(source, struct odb_source_inmemory, base); +} + +#endif diff --git a/odb/source.h b/odb/source.h index f706e0608a..0a440884e4 100644 --- a/odb/source.h +++ b/odb/source.h @@ -13,6 +13,9 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + + /* The "in-memory" backend that stores objects in memory. */ + ODB_SOURCE_INMEMORY, }; struct object_id; diff --git a/oidtree.c b/oidtree.c index ab9fe7ec7a..e43f18026e 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,9 +6,15 @@ #include "oidtree.h" #include "hash.h" +struct oidtree_node { + struct cb_node base; + struct object_id key; + void *data; +}; + void oidtree_init(struct oidtree *ot) { - cb_init(&ot->tree); + cb_init(&ot->tree, offsetof(struct oidtree_node, key)); mem_pool_init(&ot->mem_pool, 0); } @@ -20,22 +26,22 @@ void oidtree_clear(struct oidtree *ot) } } -void oidtree_insert(struct oidtree *ot, const struct object_id *oid) +struct oidtree_data { + struct object_id oid; +}; + +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data) { - struct cb_node *on; - struct object_id k; + struct oidtree_node *on; + struct cb_node *node; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); - on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid)); - - /* - * Clear the padding and copy the result in separate steps to - * respect the 4-byte alignment needed by struct object_id. - */ - oidcpy(&k, oid); - memcpy(on->k, &k, sizeof(k)); + on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); + oidcpy(&on->key, oid); + on->data = data; /* * n.b. Current callers won't get us duplicates, here. If a @@ -43,13 +49,19 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, on, sizeof(*oid)); + node = cb_insert(&ot->tree, &on->base, sizeof(*oid)); + if (node) { + struct oidtree_node *preexisting = container_of(node, struct oidtree_node, base); + preexisting->data = data; + } } -bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +static struct oidtree_node *oidtree_lookup(struct oidtree *ot, + const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); + struct cb_node *node; oidcpy(&k, oid); @@ -60,7 +72,20 @@ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + node = cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + return node ? container_of(node, struct oidtree_node, base) : NULL; +} + +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? 1 : 0; +} + +void *oidtree_get(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? node->data : NULL; } struct oidtree_each_data { @@ -73,21 +98,18 @@ struct oidtree_each_data { static int iter(struct cb_node *n, void *cb_data) { + struct oidtree_node *node = container_of(n, struct oidtree_node, base); struct oidtree_each_data *data = cb_data; - struct object_id k; - /* Copy to provide 4-byte alignment needed by struct object_id. */ - memcpy(&k, n->k, sizeof(k)); - - if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != node->key.algo) return 0; if (data->last_nibble_at) { - if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) + if ((node->key.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return 0; } - return data->cb(&k, data->cb_data); + return data->cb(&node->key, node->data, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, diff --git a/oidtree.h b/oidtree.h index 2b7bad2e60..baa5a436ea 100644 --- a/oidtree.h +++ b/oidtree.h @@ -29,18 +29,26 @@ void oidtree_init(struct oidtree *ot); */ void oidtree_clear(struct oidtree *ot); -/* Insert the object ID into the tree. */ -void oidtree_insert(struct oidtree *ot, const struct object_id *oid); +/* + * Insert the object ID into the tree and store the given pointer alongside + * with it. The data pointer of any preexisting entry will be overwritten. + */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); +/* Get the payload stored with the given object ID. */ +void *oidtree_get(struct oidtree *ot, const struct object_id *oid); + /* * Callback function used for `oidtree_each()`. Returning a non-zero exit code * will cause iteration to stop. The exit code will be propagated to the caller * of `oidtree_each()`. */ typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *node_data, void *cb_data); /* diff --git a/t/meson.build b/t/meson.build index 69bd8fcc6c..2af8d01279 100644 --- a/t/meson.build +++ b/t/meson.build @@ -6,6 +6,7 @@ clar_test_suites = [ 'unit-tests/u-hashmap.c', 'unit-tests/u-list-objects-filter-options.c', 'unit-tests/u-mem-pool.c', + 'unit-tests/u-odb-inmemory.c', 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', 'unit-tests/u-oidtree.c', diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c new file mode 100644 index 0000000000..482502ef4b --- /dev/null +++ b/t/unit-tests/u-odb-inmemory.c @@ -0,0 +1,313 @@ +#include "unit-test.h" +#include "hex.h" +#include "odb/source-inmemory.h" +#include "odb/streaming.h" +#include "oidset.h" +#include "repository.h" +#include "strbuf.h" + +#define RANDOM_OID "da39a3ee5e6b4b0d3255bfef95601890afd80709" +#define FOOBAR_OID "f6ea0495187600e7b2288c8ac19c5886383a4632" + +static struct repository repo = { + .hash_algo = &hash_algos[GIT_HASH_SHA1], +}; +static struct object_database *odb; + +static void cl_assert_object_info(struct odb_source_inmemory *source, + const struct object_id *oid, + enum object_type expected_type, + const char *expected_content) +{ + enum object_type actual_type; + unsigned long actual_size; + void *actual_content; + struct object_info oi = { + .typep = &actual_type, + .sizep = &actual_size, + .contentp = &actual_content, + }; + + cl_must_pass(odb_source_read_object_info(&source->base, oid, &oi, 0)); + cl_assert_equal_u(actual_size, strlen(expected_content)); + cl_assert_equal_u(actual_type, expected_type); + cl_assert_equal_s((char *) actual_content, expected_content); + + free(actual_content); +} + +void test_odb_inmemory__initialize(void) +{ + odb = odb_new(&repo, "", ""); +} + +void test_odb_inmemory__cleanup(void) +{ + odb_free(odb); +} + +void test_odb_inmemory__new(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_equal_i(source->base.type, ODB_SOURCE_INMEMORY); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_missing_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_must_fail(odb_source_read_object_info(&source->base, &oid, NULL, 0)); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_empty_tree(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_object_info(source, repo.hash_algo->empty_tree, OBJ_TREE, ""); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_written_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_stream_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_read_stream *stream; + struct object_id written_oid; + const char data[] = "foobar"; + char buf[3] = { 0 }; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_read_object_stream(&stream, &source->base, + &written_oid)); + cl_assert_equal_i(stream->type, OBJ_BLOB); + cl_assert_equal_u(stream->size, 6); + + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "fo"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ob"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ar"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 0); + + odb_read_stream_close(stream); + odb_source_free(&source->base); +} + +static int add_one_object(const struct object_id *oid, + struct object_info *oi UNUSED, + void *payload) +{ + struct oidset *actual_oids = payload; + cl_must_pass(oidset_insert(actual_oids, oid)); + return 0; +} + +void test_odb_inmemory__for_each_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct oidset expected_oids = OIDSET_INIT; + struct oidset actual_oids = OIDSET_INIT; + struct strbuf buf = STRBUF_INIT; + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_u(oidset_size(&actual_oids), 0); + + for (int i = 0; i < 10; i++) { + struct object_id written_oid; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%d", i); + + cl_must_pass(odb_source_write_object(&source->base, buf.buf, buf.len, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(oidset_insert(&expected_oids, &written_oid)); + } + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_b(oidset_equal(&expected_oids, &actual_oids), true); + + odb_source_free(&source->base); + oidset_clear(&expected_oids); + oidset_clear(&actual_oids); + strbuf_release(&buf); +} + +static int abort_after_two_objects(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned *counter = payload; + (*counter)++; + if (*counter == 2) + return 123; + return 0; +} + +void test_odb_inmemory__for_each_object_can_abort_iteration(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct object_id written_oid; + unsigned counter = 0; + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_assert_equal_i(odb_source_for_each_object(&source->base, NULL, + abort_after_two_objects, + &counter, &opts), + 123); + cl_assert_equal_u(counter, 2); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__count_objects(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + unsigned long count; + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 0); + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 3); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__find_abbrev_len(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid1, oid2; + unsigned abbrev_len; + + /* + * The two blobs we're about to write share the first 10 hex characters + * of their object IDs ("a09f43dc45"), so at least 11 characters are + * needed to tell them apart: + * + * "368317" -> a09f43dc4562d45115583f5094640ae237df55f7 + * "514796" -> a09f43dc45fef837235eb7e6b1a6ca5e169a3981 + * + * With only one blob written we expect a length of 4. + */ + cl_must_pass(odb_source_write_object(&source->base, "368317", strlen("368317"), + OBJ_BLOB, &oid1, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 4); + + /* + * With both objects present, the shared 10-character prefix means we + * need at least 11 characters to uniquely identify either object. + */ + cl_must_pass(odb_source_write_object(&source->base, "514796", strlen("514796"), + OBJ_BLOB, &oid2, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 11); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__freshen_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, &oid), 0); + + cl_must_pass(odb_source_write_object(&source->base, "foobar", + strlen("foobar"), OBJ_BLOB, + &written_oid, NULL, 0)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, + &written_oid), 1); + + odb_source_free(&source->base); +} + +struct membuf_write_stream { + struct odb_write_stream base; + const char *buf; + size_t offset; + size_t size; +}; + +static ssize_t membuf_write_stream_read(struct odb_write_stream *stream, + unsigned char *buf, size_t len) +{ + struct membuf_write_stream *s = container_of(stream, struct membuf_write_stream, base); + size_t chunk_size = 2; + + if (chunk_size > len) + chunk_size = len; + if (chunk_size > s->size - s->offset) + chunk_size = s->size - s->offset; + + memcpy(buf, s->buf + s->offset, chunk_size); + + s->offset += chunk_size; + if (s->offset == s->size) + s->base.is_finished = 1; + + return chunk_size; +} + +void test_odb_inmemory__write_object_stream(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct membuf_write_stream stream = { + .base.read = membuf_write_stream_read, + .buf = data, + .size = strlen(data), + }; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object_stream(&source->base, &stream.base, + strlen(data), &written_oid)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index d4d05c7dc3..f0d5ebb733 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -19,7 +19,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) for (size_t i = 0; i < n; i++) { struct object_id oid; cl_parse_any_oid(hexes[i], &oid); - oidtree_insert(ot, &oid); + oidtree_insert(ot, &oid, NULL); } return 0; } @@ -38,9 +38,9 @@ struct expected_hex_iter { const char *query; }; -static int check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *node_data UNUSED, void *cb_data) { - struct expected_hex_iter *hex_iter = data; + struct expected_hex_iter *hex_iter = cb_data; struct object_id expected; cl_assert(hex_iter->i < hex_iter->expected_hexes.nr); @@ -105,3 +105,23 @@ void test_oidtree__each(void) check_each(&ot, "32100", "321", NULL); check_each(&ot, "32", "320", "321", NULL); } + +void test_oidtree__insert_overwrites_data(void) +{ + struct object_id oid; + struct oidtree ot; + int a, b; + + cl_parse_any_oid("1", &oid); + + oidtree_init(&ot); + + oidtree_insert(&ot, &oid, NULL); + cl_assert_equal_p(oidtree_get(&ot, &oid), NULL); + oidtree_insert(&ot, &oid, &a); + cl_assert_equal_p(oidtree_get(&ot, &oid), &a); + oidtree_insert(&ot, &oid, &b); + cl_assert_equal_p(oidtree_get(&ot, &oid), &b); + + oidtree_clear(&ot); +}