diff --git a/Makefile b/Makefile index b31ecb0756..0976a69b4c 100644 --- a/Makefile +++ b/Makefile @@ -1217,6 +1217,7 @@ LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o LIB_OBJS += odb/source-inmemory.o +LIB_OBJS += odb/source-loose.o LIB_OBJS += odb/streaming.o LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fa45f774d7..04b64006a5 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -891,8 +891,9 @@ static void batch_each_object(struct batch_options *opt, */ odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { - int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, &opts); + struct odb_source_files *files = odb_source_files_downcast(source); + int ret = odb_source_for_each_object(&files->loose->base, NULL, batch_one_object_oi, + &payload, &opts); if (ret) break; } diff --git a/builtin/gc.c b/builtin/gc.c index 84a66d3240..c26c93ee0f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -466,6 +466,7 @@ out: static int too_many_loose_objects(int limit) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); /* * This is weird, but stems from legacy behaviour: the GC auto * threshold was always essentially interpreted as if it was rounded up @@ -474,9 +475,8 @@ static int too_many_loose_objects(int limit) int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; unsigned long loose_count; - if (odb_source_loose_count_objects(the_repository->objects->sources, - ODB_COUNT_OBJECTS_APPROXIMATE, - &loose_count) < 0) + if (odb_source_count_objects(&files->loose->base, ODB_COUNT_OBJECTS_APPROXIMATE, + &loose_count) < 0) return 0; return loose_count > auto_threshold; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fe9fbecb30..50675481e1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1750,9 +1750,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, * skip the local object source. */ struct odb_source *source = the_repository->objects->sources->next; - for (; source; source = source->next) - if (odb_source_loose_has_object(source, oid)) + for (; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0)) return 0; + } } /* @@ -4135,9 +4137,11 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type struct odb_source *source = the_repository->objects->sources; int found = 0; - for (; !found && source; source = source->next) - if (odb_source_loose_has_object(source, oid)) + for (; !found && source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0)) found = 1; + } /* * If a traversed tree has a missing blob then we want diff --git a/http-walker.c b/http-walker.c index f252de089f..b58a3b2a92 100644 --- a/http-walker.c +++ b/http-walker.c @@ -539,8 +539,9 @@ static int fetch_object(struct walker *walker, const struct object_id *oid) } else if (!oideq(&obj_req->oid, &req->real_oid)) { ret = error("File %s has bad hash", hex); } else if (req->rename < 0) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); struct strbuf buf = STRBUF_INIT; - odb_loose_path(the_repository->objects->sources, &buf, &req->oid); + odb_loose_path(files->loose, &buf, &req->oid); ret = error("unable to write sha1 filename %s", buf.buf); strbuf_release(&buf); } diff --git a/http.c b/http.c index ea9b16861b..3fcc012233 100644 --- a/http.c +++ b/http.c @@ -2826,6 +2826,7 @@ static size_t fwrite_sha1_file(char *ptr, size_t eltsize, size_t nmemb, struct http_object_request *new_http_object_request(const char *base_url, const struct object_id *oid) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); char *hex = oid_to_hex(oid); struct strbuf filename = STRBUF_INIT; struct strbuf prevfile = STRBUF_INIT; @@ -2840,7 +2841,7 @@ struct http_object_request *new_http_object_request(const char *base_url, oidcpy(&freq->oid, oid); freq->localfile = -1; - odb_loose_path(the_repository->objects->sources, &filename, oid); + odb_loose_path(files->loose, &filename, oid); strbuf_addf(&freq->tmpfile, "%s.temp", filename.buf); strbuf_addf(&prevfile, "%s.prev", filename.buf); @@ -2966,6 +2967,7 @@ void process_http_object_request(struct http_object_request *freq) int finish_http_object_request(struct http_object_request *freq) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); struct stat st; struct strbuf filename = STRBUF_INIT; @@ -2992,7 +2994,7 @@ int finish_http_object_request(struct http_object_request *freq) unlink_or_warn(freq->tmpfile.buf); return -1; } - odb_loose_path(the_repository->objects->sources, &filename, &freq->oid); + odb_loose_path(files->loose, &filename, &freq->oid); freq->rename = finalize_object_file(the_repository, freq->tmpfile.buf, filename.buf); strbuf_release(&filename); diff --git a/loose.c b/loose.c index f7a3dd1a72..0b626c1b85 100644 --- a/loose.c +++ b/loose.c @@ -46,38 +46,36 @@ static int insert_oid_pair(kh_oid_map_t *map, const struct object_id *key, const return 1; } -static int insert_loose_map(struct odb_source *source, +static int insert_loose_map(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid) { - struct odb_source_files *files = odb_source_files_downcast(source); - struct loose_object_map *map = files->loose->map; + struct loose_object_map *map = loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(files->loose->cache, compat_oid, NULL); + oidtree_insert(loose->cache, compat_oid, NULL); return inserted; } -static int load_one_loose_object_map(struct repository *repo, struct odb_source *source) +static int load_one_loose_object_map(struct repository *repo, struct odb_source_loose *loose) { - struct odb_source_files *files = odb_source_files_downcast(source); struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!files->loose->map) - loose_object_map_init(&files->loose->map); - if (!files->loose->cache) { - ALLOC_ARRAY(files->loose->cache, 1); - oidtree_init(files->loose->cache); + if (!loose->map) + loose_object_map_init(&loose->map); + if (!loose->cache) { + ALLOC_ARRAY(loose->cache, 1); + oidtree_init(loose->cache); } - insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); - insert_loose_map(source, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob); - insert_loose_map(source, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid); + insert_loose_map(loose, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); + insert_loose_map(loose, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob); + insert_loose_map(loose, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid); repo_common_path_replace(repo, &path, "objects/loose-object-idx"); fp = fopen(path.buf, "rb"); @@ -97,7 +95,7 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) || p != buf.buf + buf.len) goto err; - insert_loose_map(source, &oid, &compat_oid); + insert_loose_map(loose, &oid, &compat_oid); } strbuf_release(&buf); @@ -119,7 +117,8 @@ int repo_read_loose_object_map(struct repository *repo) odb_prepare_alternates(repo->objects); for (source = repo->objects->sources; source; source = source->next) { - if (load_one_loose_object_map(repo, source) < 0) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (load_one_loose_object_map(repo, files->loose) < 0) { return -1; } } @@ -171,7 +170,7 @@ errout: return -1; } -static int write_one_object(struct odb_source *source, +static int write_one_object(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid) { @@ -180,7 +179,7 @@ static int write_one_object(struct odb_source *source, struct stat st; struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; - strbuf_addf(&path, "%s/loose-object-idx", source->path); + strbuf_addf(&path, "%s/loose-object-idx", loose->base.path); hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1); fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666); @@ -196,7 +195,7 @@ static int write_one_object(struct odb_source *source, goto errout; if (close(fd)) goto errout; - adjust_shared_perm(source->odb->repo, path.buf); + adjust_shared_perm(loose->base.odb->repo, path.buf); rollback_lock_file(&lock); strbuf_release(&buf); strbuf_release(&path); @@ -210,18 +209,18 @@ errout: return -1; } -int repo_add_loose_object_map(struct odb_source *source, +int repo_add_loose_object_map(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid) { int inserted = 0; - if (!should_use_loose_object_map(source->odb->repo)) + if (!should_use_loose_object_map(loose->base.odb->repo)) return 0; - inserted = insert_loose_map(source, oid, compat_oid); + inserted = insert_loose_map(loose, oid, compat_oid); if (inserted) - return write_one_object(source, oid, compat_oid); + return write_one_object(loose, oid, compat_oid); return 0; } diff --git a/loose.h b/loose.h index 6af1702973..6c9b3f4571 100644 --- a/loose.h +++ b/loose.h @@ -4,7 +4,7 @@ #include "khash.h" struct repository; -struct odb_source; +struct odb_source_loose; struct loose_object_map { kh_oid_map_t *to_compat; @@ -17,7 +17,7 @@ int repo_loose_object_map_oid(struct repository *repo, const struct object_id *src, const struct git_hash_algo *dest_algo, struct object_id *dest); -int repo_add_loose_object_map(struct odb_source *source, +int repo_add_loose_object_map(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid); int repo_read_loose_object_map(struct repository *repo); diff --git a/meson.build b/meson.build index 064fe2e2f1..3247697f74 100644 --- a/meson.build +++ b/meson.build @@ -405,6 +405,7 @@ libgit_sources = [ 'odb/source.c', 'odb/source-files.c', 'odb/source-inmemory.c', + 'odb/source-loose.c', 'odb/streaming.c', 'odb/transaction.c', 'oid-array.c', diff --git a/object-file.c b/object-file.c index 90f995d000..bce941874e 100644 --- a/object-file.c +++ b/object-file.c @@ -22,7 +22,6 @@ #include "odb.h" #include "odb/streaming.h" #include "odb/transaction.h" -#include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" @@ -31,12 +30,6 @@ #include "tempfile.h" #include "tmp-objdir.h" -/* The maximum size for an object header. */ -#define MAX_HEADER_LEN 32 - -static struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -61,14 +54,14 @@ static void fill_loose_path(struct strbuf *buf, } } -const char *odb_loose_path(struct odb_source *source, +const char *odb_loose_path(struct odb_source_loose *loose, struct strbuf *buf, const struct object_id *oid) { strbuf_reset(buf); - strbuf_addstr(buf, source->path); + strbuf_addstr(buf, loose->base.path); strbuf_addch(buf, '/'); - fill_loose_path(buf, oid, source->odb->repo->hash_algo); + fill_loose_path(buf, oid, loose->base.odb->repo->hash_algo); return buf->buf; } @@ -94,21 +87,6 @@ int check_and_freshen_file(const char *fn, int freshen) return 1; } -static int check_and_freshen_source(struct odb_source *source, - const struct object_id *oid, - int freshen) -{ - static struct strbuf path = STRBUF_INIT; - odb_loose_path(source, &path, oid); - return check_and_freshen_file(path.buf, freshen); -} - -int odb_source_loose_has_object(struct odb_source *source, - const struct object_id *oid) -{ - return check_and_freshen_source(source, oid, 0); -} - int format_object_header(char *str, size_t size, enum object_type type, size_t objsize) { @@ -164,34 +142,6 @@ int stream_object_signature(struct repository *r, return !oideq(oid, &real_oid) ? -1 : 0; } -/* - * Find "oid" as a loose object in given source, open the object and return its - * file descriptor. Returns the file descriptor on success, negative on failure. - * - * The "path" out-parameter will give the path of the object we found (if any). - * Note that it may point to static storage and is only valid until another - * call to stat_loose_object(). - */ -static int open_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - int fd; - - *path = odb_loose_path(loose->source, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; - - return -1; -} - -static int quick_has_loose(struct odb_source_loose *loose, - const struct object_id *oid) -{ - return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); -} - /* * Map and close the given loose object fd. The path argument is used for * error reporting. @@ -215,42 +165,11 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -static void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - const char *p; - int fd = open_loose_object(files->loose, oid, &p); - - if (fd < 0) - return NULL; - return map_fd(fd, p, size); -} - -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; - -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -280,9 +199,9 @@ static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, return ULHR_TOO_LONG; } -static void *unpack_loose_rest(git_zstream *stream, - void *buffer, unsigned long size, - const struct object_id *oid) +void *unpack_loose_rest(git_zstream *stream, + void *buffer, unsigned long size, + const struct object_id *oid) { size_t bytes = strlen(buffer) + 1, n; unsigned char *buf = xmallocz(size); @@ -340,7 +259,7 @@ static void *unpack_loose_rest(git_zstream *stream, * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_loose_header(const char *hdr, struct object_info *oi) +int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -396,170 +315,6 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -static int read_object_info_from_path(struct odb_source *source, - const char *path, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - int ret; - int fd; - unsigned long mapsize; - void *map = NULL; - git_zstream stream, *stream_to_end = NULL; - char hdr[MAX_HEADER_LEN]; - unsigned long size_scratch; - enum object_type type_scratch; - struct stat st; - - /* - * If we don't care about type or size, then we don't - * need to look inside the object at all. Note that we - * do not optimize out the stat call, even if the - * caller doesn't care about the disk-size, since our - * return value implicitly indicates whether the - * object even exists. - */ - if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { - struct stat st; - - if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { - ret = quick_has_loose(files->loose, oid) ? 0 : -1; - goto out; - } - - if (lstat(path, &st) < 0) { - ret = -1; - goto out; - } - - if (oi) { - if (oi->disk_sizep) - *oi->disk_sizep = st.st_size; - if (oi->mtimep) - *oi->mtimep = st.st_mtime; - } - - ret = 0; - goto out; - } - - fd = git_open(path); - if (fd < 0) { - if (errno != ENOENT) - error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); - ret = -1; - goto out; - } - - if (fstat(fd, &st)) { - close(fd); - ret = -1; - goto out; - } - - mapsize = xsize_t(st.st_size); - if (!mapsize) { - close(fd); - ret = error(_("object file %s is empty"), path); - goto out; - } - - map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (!map) { - ret = -1; - goto out; - } - - if (oi->disk_sizep) - *oi->disk_sizep = mapsize; - if (oi->mtimep) - *oi->mtimep = st.st_mtime; - - stream_to_end = &stream; - - switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { - case ULHR_OK: - if (!oi->sizep) - oi->sizep = &size_scratch; - if (!oi->typep) - oi->typep = &type_scratch; - - if (parse_loose_header(hdr, oi) < 0) { - ret = error(_("unable to parse %s header"), oid_to_hex(oid)); - goto corrupt; - } - - if (*oi->typep < 0) - die(_("invalid object type")); - - if (oi->contentp) { - *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); - if (!*oi->contentp) { - ret = -1; - goto corrupt; - } - } - - break; - case ULHR_BAD: - ret = error(_("unable to unpack %s header"), - oid_to_hex(oid)); - goto corrupt; - case ULHR_TOO_LONG: - ret = error(_("header for %s too long, exceeds %d bytes"), - oid_to_hex(oid), MAX_HEADER_LEN); - goto corrupt; - } - - ret = 0; - -corrupt: - if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) - die(_("loose object %s (stored in %s) is corrupt"), - oid_to_hex(oid), path); - -out: - if (stream_to_end) - git_inflate_end(stream_to_end); - if (map) - munmap(map, mapsize); - if (oi) { - if (oi->sizep == &size_scratch) - oi->sizep = NULL; - if (oi->typep == &type_scratch) - oi->typep = NULL; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); - if (!ret) - oi->whence = OI_LOOSE; - } - - return ret; -} - -int odb_source_loose_read_object_info(struct odb_source *source, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags) -{ - static struct strbuf buf = STRBUF_INIT; - - /* - * The second read shouldn't cause new loose objects to show up, unless - * there was a race condition with a secondary process. We don't care - * about this case though, so we simply skip reading loose objects a - * second time. - */ - if (flags & OBJECT_INFO_SECOND_READ) - return -1; - - odb_loose_path(source, &buf, oid); - return read_object_info_from_path(source, buf.buf, oid, oi, flags); -} - static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, @@ -571,10 +326,10 @@ static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_c git_hash_final_oid(oid, c); } -static void write_object_file_prepare(const struct git_hash_algo *algo, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - char *hdr, int *hdrlen) +void write_object_file_prepare(const struct git_hash_algo *algo, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + char *hdr, int *hdrlen) { struct git_hash_ctx c; @@ -820,14 +575,14 @@ static void flush_loose_object_transaction(struct odb_transaction_files *transac } /* Finalize a file on disk, and close it. */ -static void close_loose_object(struct odb_source *source, +static void close_loose_object(struct odb_source_loose *loose, int fd, const char *filename) { - if (source->will_destroy) + if (loose->base.will_destroy) goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_transaction(source->odb->transaction, fd, filename); + fsync_loose_object_transaction(loose->base.odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -896,7 +651,7 @@ static int create_tmpfile(struct repository *repo, * Returns a "fd", which should later be provided to * end_loose_object_common(). */ -static int start_loose_object_common(struct odb_source *source, +static int start_loose_object_common(struct odb_source_loose *loose, struct strbuf *tmp_file, const char *filename, unsigned flags, git_zstream *stream, @@ -904,18 +659,18 @@ static int start_loose_object_common(struct odb_source *source, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, char *hdr, int hdrlen) { - const struct git_hash_algo *algo = source->odb->repo->hash_algo; - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *algo = loose->base.odb->repo->hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; int fd; - fd = create_tmpfile(source->odb->repo, tmp_file, filename); + fd = create_tmpfile(loose->base.odb->repo, tmp_file, filename); if (fd < 0) { if (flags & ODB_WRITE_OBJECT_SILENT) return -1; else if (errno == EACCES) return error(_("insufficient permission for adding " "an object to repository database %s"), - source->path); + loose->base.path); else return error_errno( _("unable to create temporary file")); @@ -945,14 +700,14 @@ static int start_loose_object_common(struct odb_source *source, * Common steps for the inner git_deflate() loop for writing loose * objects. Returns what git_deflate() returns. */ -static int write_loose_object_common(struct odb_source *source, +static int write_loose_object_common(struct odb_source_loose *loose, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, const int flush, unsigned char *in0, const int fd, unsigned char *compressed, const size_t compressed_len) { - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; int ret; ret = git_deflate(stream, flush ? Z_FINISH : 0); @@ -973,12 +728,12 @@ static int write_loose_object_common(struct odb_source *source, * - End the compression of zlib stream. * - Get the calculated oid to "oid". */ -static int end_loose_object_common(struct odb_source *source, +static int end_loose_object_common(struct odb_source_loose *loose, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, struct object_id *oid, struct object_id *compat_oid) { - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; int ret; ret = git_deflate_end_gently(stream); @@ -991,10 +746,10 @@ static int end_loose_object_common(struct odb_source *source, return Z_OK; } -static int write_loose_object(struct odb_source *source, - const struct object_id *oid, char *hdr, - int hdrlen, const void *buf, unsigned long len, - time_t mtime, unsigned flags) +int write_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, char *hdr, + int hdrlen, const void *buf, unsigned long len, + time_t mtime, unsigned flags) { int fd, ret; unsigned char compressed[4096]; @@ -1005,11 +760,11 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_transaction(source->odb->transaction); + prepare_loose_object_transaction(loose->base.odb->transaction); - odb_loose_path(source, &filename, oid); + odb_loose_path(loose, &filename, oid); - fd = start_loose_object_common(source, &tmp_file, filename.buf, flags, + fd = start_loose_object_common(loose, &tmp_file, filename.buf, flags, &stream, compressed, sizeof(compressed), &c, NULL, hdr, hdrlen); if (fd < 0) @@ -1021,14 +776,14 @@ static int write_loose_object(struct odb_source *source, do { unsigned char *in0 = stream.next_in; - ret = write_loose_object_common(source, &c, NULL, &stream, 1, in0, fd, + ret = write_loose_object_common(loose, &c, NULL, &stream, 1, in0, fd, compressed, sizeof(compressed)); } while (ret == Z_OK); if (ret != Z_STREAM_END) die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), ret); - ret = end_loose_object_common(source, &c, NULL, &stream, ¶no_oid, NULL); + ret = end_loose_object_common(loose, &c, NULL, &stream, ¶no_oid, NULL); if (ret != Z_OK) die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), ret); @@ -1036,7 +791,7 @@ static int write_loose_object(struct odb_source *source, die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); - close_loose_object(source, fd, tmp_file.buf); + close_loose_object(loose, fd, tmp_file.buf); if (mtime) { struct utimbuf utb; @@ -1047,21 +802,15 @@ static int write_loose_object(struct odb_source *source, warning_errno(_("failed utime() on %s"), tmp_file.buf); } - return finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, + return finalize_object_file_flags(loose->base.odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); } -int odb_source_loose_freshen_object(struct odb_source *source, - const struct object_id *oid) -{ - return !!check_and_freshen_source(source, oid, 1); -} - -int odb_source_loose_write_stream(struct odb_source *source, +int odb_source_loose_write_stream(struct odb_source_loose *loose, struct odb_write_stream *in_stream, size_t len, struct object_id *oid) { - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; struct object_id compat_oid; int fd, ret, err = 0, flush = 0; unsigned char compressed[4096]; @@ -1075,10 +824,10 @@ int odb_source_loose_write_stream(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_transaction(source->odb->transaction); + prepare_loose_object_transaction(loose->base.odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ - strbuf_addf(&filename, "%s/", source->path); + strbuf_addf(&filename, "%s/", loose->base.path); hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len); /* @@ -1089,7 +838,7 @@ int odb_source_loose_write_stream(struct odb_source *source, * - Setup zlib stream for compression. * - Start to feed header to zlib stream. */ - fd = start_loose_object_common(source, &tmp_file, filename.buf, 0, + fd = start_loose_object_common(loose, &tmp_file, filename.buf, 0, &stream, compressed, sizeof(compressed), &c, &compat_c, hdr, hdrlen); if (fd < 0) { @@ -1117,7 +866,7 @@ int odb_source_loose_write_stream(struct odb_source *source, if (in_stream->is_finished) flush = 1; } - ret = write_loose_object_common(source, &c, &compat_c, &stream, flush, in0, fd, + ret = write_loose_object_common(loose, &c, &compat_c, &stream, flush, in0, fd, compressed, sizeof(compressed)); /* * Unlike write_loose_object(), we do not have the entire @@ -1140,16 +889,16 @@ int odb_source_loose_write_stream(struct odb_source *source, */ if (ret != Z_STREAM_END) die(_("unable to stream deflate new object (%d)"), ret); - ret = end_loose_object_common(source, &c, &compat_c, &stream, oid, &compat_oid); + ret = end_loose_object_common(loose, &c, &compat_c, &stream, oid, &compat_oid); if (ret != Z_OK) die(_("deflateEnd on stream object failed (%d)"), ret); - close_loose_object(source, fd, tmp_file.buf); + close_loose_object(loose, fd, tmp_file.buf); - if (odb_freshen_object(source->odb, oid)) { + if (odb_freshen_object(loose->base.odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(source, &filename, oid); + odb_loose_path(loose, &filename, oid); /* We finally know the object path, and create the missing dir. */ dirlen = directory_size(filename.buf); @@ -1157,7 +906,7 @@ int odb_source_loose_write_stream(struct odb_source *source, struct strbuf dir = STRBUF_INIT; strbuf_add(&dir, filename.buf, dirlen); - if (safe_create_dir_in_gitdir(source->odb->repo, dir.buf) && + if (safe_create_dir_in_gitdir(loose->base.odb->repo, dir.buf) && errno != EEXIST) { err = error_errno(_("unable to create directory %s"), dir.buf); strbuf_release(&dir); @@ -1166,60 +915,20 @@ int odb_source_loose_write_stream(struct odb_source *source, strbuf_release(&dir); } - err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, + err = finalize_object_file_flags(loose->base.odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) - err = repo_add_loose_object_map(source, oid, &compat_oid); + err = repo_add_loose_object_map(loose, oid, &compat_oid); cleanup: strbuf_release(&tmp_file); strbuf_release(&filename); return err; } -int odb_source_loose_write_object(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, - enum odb_write_object_flags flags) -{ - const struct git_hash_algo *algo = source->odb->repo->hash_algo; - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; - struct object_id compat_oid; - char hdr[MAX_HEADER_LEN]; - int hdrlen = sizeof(hdr); - - /* Generate compat_oid */ - if (compat) { - if (compat_oid_in) - oidcpy(&compat_oid, compat_oid_in); - else if (type == OBJ_BLOB) - hash_object_file(compat, buf, len, type, &compat_oid); - else { - struct strbuf converted = STRBUF_INIT; - convert_object_file(source->odb->repo, &converted, algo, compat, - buf, len, type, 0); - hash_object_file(compat, converted.buf, converted.len, - type, &compat_oid); - strbuf_release(&converted); - } - } - - /* Normally if we have it in the pack then we do not bother writing - * it out into .git/objects/??/?{38} file. - */ - write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (odb_freshen_object(source->odb, oid)) - return 0; - if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) - return -1; - if (compat) - return repo_add_loose_object_map(source, oid, &compat_oid); - return 0; -} - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime) { + struct odb_source_files *files = odb_source_files_downcast(source); const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; void *buf; unsigned long len; @@ -1230,9 +939,11 @@ int force_object_loose(struct odb_source *source, int hdrlen; int ret; - for (struct odb_source *s = source->odb->sources; s; s = s->next) - if (odb_source_loose_has_object(s, oid)) + for (struct odb_source *s = source->odb->sources; s; s = s->next) { + struct odb_source_files *files = odb_source_files_downcast(s); + if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0)) return 0; + } oi.typep = &type; oi.sizep = &len; @@ -1245,9 +956,9 @@ int force_object_loose(struct odb_source *source, oid_to_hex(oid), compat->name); } hdrlen = format_object_header(hdr, sizeof(hdr), type, len); - ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0); + ret = write_loose_object(files->loose, oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) - ret = repo_add_loose_object_map(source, oid, &compat_oid); + ret = repo_add_loose_object_map(files->loose, oid, &compat_oid); free(buf); return ret; @@ -1741,13 +1452,13 @@ int read_pack_header(int fd, struct pack_header *header) return 0; } -static int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - const struct git_hash_algo *algop, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + const struct git_hash_algo *algop, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) { size_t origlen, baselen; DIR *dir; @@ -1832,229 +1543,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -struct for_each_object_wrapper_data { - struct odb_source *source; - const struct object_info *request; - odb_for_each_object_cb cb; - void *cb_data; -}; - -static int for_each_object_wrapper_cb(const struct object_id *oid, - const char *path, - void *cb_data) -{ - struct for_each_object_wrapper_data *data = cb_data; - - if (data->request) { - struct object_info oi = *data->request; - - if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0) - return -1; - - return data->cb(oid, &oi, data->cb_data); - } else { - return data->cb(oid, NULL, data->cb_data); - } -} - -static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, - void *node_data UNUSED, - void *cb_data) -{ - struct for_each_object_wrapper_data *data = cb_data; - if (data->request) { - struct object_info oi = *data->request; - - if (odb_source_loose_read_object_info(data->source, - oid, &oi, 0) < 0) - return -1; - - return data->cb(oid, &oi, data->cb_data); - } else { - return data->cb(oid, NULL, data->cb_data); - } -} - -int odb_source_loose_for_each_object(struct odb_source *source, - const struct object_info *request, - odb_for_each_object_cb cb, - void *cb_data, - const struct odb_for_each_object_options *opts) -{ - struct for_each_object_wrapper_data data = { - .source = source, - .request = request, - .cb = cb, - .cb_data = cb_data, - }; - - /* There are no loose promisor objects, so we can return immediately. */ - if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) - return 0; - if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) - return 0; - - if (opts->prefix) - return oidtree_each(odb_source_loose_cache(source, opts->prefix), - opts->prefix, opts->prefix_hex_len, - for_each_prefixed_object_wrapper_cb, &data); - - return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, - NULL, NULL, &data); -} - -static int count_loose_object(const struct object_id *oid UNUSED, - struct object_info *oi UNUSED, - void *payload) -{ - unsigned long *count = payload; - (*count)++; - return 0; -} - -int odb_source_loose_count_objects(struct odb_source *source, - enum odb_count_objects_flags flags, - unsigned long *out) -{ - const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; - char *path = NULL; - DIR *dir = NULL; - int ret; - - if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) { - unsigned long count = 0; - struct dirent *ent; - - path = xstrfmt("%s/17", source->path); - - dir = opendir(path); - if (!dir) { - if (errno == ENOENT) { - *out = 0; - ret = 0; - goto out; - } - - ret = error_errno("cannot open object shard '%s'", path); - goto out; - } - - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz || - ent->d_name[hexsz] != '\0') - continue; - count++; - } - - *out = count * 256; - ret = 0; - } else { - struct odb_for_each_object_options opts = { 0 }; - *out = 0; - ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, &opts); - } - -out: - if (dir) - closedir(dir); - free(path); - return ret; -} - -struct find_abbrev_len_data { - const struct object_id *oid; - unsigned len; -}; - -static int find_abbrev_len_cb(const struct object_id *oid, - struct object_info *oi UNUSED, - void *cb_data) -{ - struct find_abbrev_len_data *data = cb_data; - unsigned len = oid_common_prefix_hexlen(oid, data->oid); - if (len != hash_algos[oid->algo].hexsz && len >= data->len) - data->len = len + 1; - return 0; -} - -int odb_source_loose_find_abbrev_len(struct odb_source *source, - const struct object_id *oid, - unsigned min_len, - unsigned *out) -{ - struct odb_for_each_object_options opts = { - .prefix = oid, - .prefix_hex_len = min_len, - }; - struct find_abbrev_len_data data = { - .oid = oid, - .len = min_len, - }; - int ret; - - ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb, - &data, &opts); - *out = data.len; - - return ret; -} - -static int append_loose_object(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - oidtree_insert(data, oid, NULL); - return 0; -} - -static struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - int subdir_nr = oid->hash[0]; - struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(files->loose->subdir_seen[0]); - size_t word_index = subdir_nr / word_bits; - size_t mask = (size_t)1u << (subdir_nr % word_bits); - uint32_t *bitmap; - - if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen)) - BUG("subdir_nr out of range"); - - bitmap = &files->loose->subdir_seen[word_index]; - if (*bitmap & mask) - return files->loose->cache; - if (!files->loose->cache) { - ALLOC_ARRAY(files->loose->cache, 1); - oidtree_init(files->loose->cache); - } - strbuf_addstr(&buf, source->path); - for_each_file_in_obj_subdir(subdir_nr, &buf, - source->odb->repo->hash_algo, - append_loose_object, - NULL, NULL, - files->loose->cache); - *bitmap |= mask; - strbuf_release(&buf); - return files->loose->cache; -} - -static void odb_source_loose_clear_cache(struct odb_source_loose *loose) -{ - oidtree_clear(loose->cache); - FREE_AND_NULL(loose->cache); - memset(&loose->subdir_seen, 0, - sizeof(loose->subdir_seen)); -} - -void odb_source_loose_reprepare(struct odb_source *source) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - odb_source_loose_clear_cache(files->loose); -} - static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, @@ -2204,155 +1692,3 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) return &transaction->base; } - -struct odb_source_loose *odb_source_loose_new(struct odb_source *source) -{ - struct odb_source_loose *loose; - CALLOC_ARRAY(loose, 1); - loose->source = source; - return loose; -} - -void odb_source_loose_free(struct odb_source_loose *loose) -{ - if (!loose) - return; - odb_source_loose_clear_cache(loose); - loose_object_map_clear(&loose->map); - free(loose); -} - -struct odb_loose_read_stream { - struct odb_read_stream base; - git_zstream z; - enum { - ODB_LOOSE_READ_STREAM_INUSE, - ODB_LOOSE_READ_STREAM_DONE, - ODB_LOOSE_READ_STREAM_ERROR, - } z_state; - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; -}; - -static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) -{ - struct odb_loose_read_stream *st = - container_of(_st, struct odb_loose_read_stream, base); - size_t total_read = 0; - - switch (st->z_state) { - case ODB_LOOSE_READ_STREAM_DONE: - return 0; - case ODB_LOOSE_READ_STREAM_ERROR: - return -1; - default: - break; - } - - if (st->hdr_used < st->hdr_avail) { - size_t to_copy = st->hdr_avail - st->hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->hdr + st->hdr_used, to_copy); - st->hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_DONE; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct odb_read_stream *_st) -{ - struct odb_loose_read_stream *st = - container_of(_st, struct odb_loose_read_stream, base); - - if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - return 0; -} - -int odb_source_loose_read_object_stream(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_loose_read_stream *st; - unsigned long mapsize; - unsigned long size_ul; - void *mapped; - - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (!mapped) - return -1; - - /* - * Note: we must allocate this structure early even though we may still - * fail. This is because we need to initialize the zlib stream, and it - * is not possible to copy the stream around after the fact because it - * has self-referencing pointers. - */ - CALLOC_ARRAY(st, 1); - - switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, - sizeof(st->hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - - /* - * object_info.sizep is unsigned long* (32-bit on Windows), but - * st->base.size is size_t (64-bit). Use temporary variable. - * Note: loose objects >4GB would still truncate here, but such - * large loose objects are uncommon (they'd normally be packed). - */ - oi.sizep = &size_ul; - oi.typep = &st->base.type; - - if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) - goto error; - st->base.size = size_ul; - - st->mapped = mapped; - st->mapsize = mapsize; - st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->z.total_out; - st->z_state = ODB_LOOSE_READ_STREAM_INUSE; - st->base.close = close_istream_loose; - st->base.read = read_istream_loose; - - *out = &st->base; - - return 0; -error: - git_inflate_end(&st->z); - munmap(mapped, mapsize); - free(st); - return -1; -} diff --git a/object-file.h b/object-file.h index 5241b8dd5c..528c4e6e69 100644 --- a/object-file.h +++ b/object-file.h @@ -4,6 +4,10 @@ #include "git-zlib.h" #include "object.h" #include "odb.h" +#include "odb/source-loose.h" + +/* The maximum size for an object header. */ +#define MAX_HEADER_LEN 32 struct index_state; @@ -17,61 +21,19 @@ int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct s int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); struct object_info; -struct odb_read_stream; struct odb_source; -struct odb_source_loose { - struct odb_source *source; - - /* - * Used to store the results of readdir(3) calls when we are OK - * sacrificing accuracy due to races for speed. That includes - * object existence with OBJECT_INFO_QUICK, as well as - * our search for unique abbreviated hashes. Don't use it for tasks - * requiring greater accuracy! - * - * Be sure to call odb_load_loose_cache() before using. - */ - uint32_t subdir_seen[8]; /* 256 bits */ - struct oidtree *cache; - - /* Map between object IDs for loose objects. */ - struct loose_object_map *map; -}; - -struct odb_source_loose *odb_source_loose_new(struct odb_source *source); -void odb_source_loose_free(struct odb_source_loose *loose); - -/* Reprepare the loose source by emptying the loose object cache. */ -void odb_source_loose_reprepare(struct odb_source *source); - -int odb_source_loose_read_object_info(struct odb_source *source, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags); - -int odb_source_loose_read_object_stream(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid); - /* - * Return true iff an object database source has a loose object - * with the specified name. This function does not respect replace - * references. + * Write the given stream into the loose object source. The only difference + * from the generic implementation of this function is that we don't perform an + * object existence check here. + * + * TODO: We should stop exposing this function altogether and move it into + * "odb/source-loose.c". This requires a couple of refactorings though to make + * `force_object_loose()` generic and is thus postponed to a later point in + * time. */ -int odb_source_loose_has_object(struct odb_source *source, - const struct object_id *oid); - -int odb_source_loose_freshen_object(struct odb_source *source, - const struct object_id *oid); - -int odb_source_loose_write_object(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, - enum odb_write_object_flags flags); - -int odb_source_loose_write_stream(struct odb_source *source, +int odb_source_loose_write_stream(struct odb_source_loose *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); @@ -79,7 +41,7 @@ int odb_source_loose_write_stream(struct odb_source *source, * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. */ -const char *odb_loose_path(struct odb_source *source, +const char *odb_loose_path(struct odb_source_loose *source, struct strbuf *buf, const struct object_id *oid); @@ -119,45 +81,13 @@ int for_each_loose_file_in_source(struct odb_source *source, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, void *data); - -/* - * Iterate through all loose objects in the given object database source and - * invoke the callback function for each of them. If an object info request is - * given, then the object info will be read for every individual object and - * passed to the callback as if `odb_source_loose_read_object_info()` was - * called for the object. - */ -int odb_source_loose_for_each_object(struct odb_source *source, - const struct object_info *request, - odb_for_each_object_cb cb, - void *cb_data, - const struct odb_for_each_object_options *opts); - -/* - * Count the number of loose objects in this source. - * - * The object count is approximated by opening a single sharding directory for - * loose objects and scanning its contents. The result is then extrapolated by - * 256. This should generally work as a reasonable estimate given that the - * object hash is supposed to be indistinguishable from random. - * - * Returns 0 on success, a negative error code otherwise. - */ -int odb_source_loose_count_objects(struct odb_source *source, - enum odb_count_objects_flags flags, - unsigned long *out); - -/* - * Find the shortest unique prefix for the given object ID, where `min_len` is - * the minimum length that the prefix should have. - * - * Returns 0 on success, in which case the computed length will be written to - * `out`. Otherwise, a negative error code is returned. - */ -int odb_source_loose_find_abbrev_len(struct odb_source *source, - const struct object_id *oid, - unsigned min_len, - unsigned *out); +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + const struct git_hash_algo *algop, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); /** * format_object_header() is a thin wrapper around s xsnprintf() that @@ -203,6 +133,14 @@ int finalize_object_file_flags(struct repository *repo, void hash_object_file(const struct git_hash_algo *algo, const void *buf, unsigned long len, enum object_type type, struct object_id *oid); +void write_object_file_prepare(const struct git_hash_algo *algo, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + char *hdr, int *hdrlen); +int write_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, char *hdr, + int hdrlen, const void *buf, unsigned long len, + time_t mtime, unsigned flags); /* Helper to check and "touch" a file */ int check_and_freshen_file(const char *fn, int freshen); @@ -222,6 +160,35 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz); +void *unpack_loose_rest(git_zstream *stream, + void *buffer, unsigned long size, + const struct object_id *oid); + +int parse_loose_header(const char *hdr, struct object_info *oi); + struct odb_transaction; /* diff --git a/odb/source-files.c b/odb/source-files.c index b5abd20e97..5bdd042922 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -7,6 +7,7 @@ #include "odb.h" #include "odb/source.h" #include "odb/source-files.h" +#include "odb/source-loose.h" #include "packfile.h" #include "strbuf.h" #include "write-or-die.h" @@ -27,7 +28,7 @@ static void odb_source_files_free(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); chdir_notify_unregister(NULL, odb_source_files_reparent, files); - odb_source_loose_free(files->loose); + odb_source_free(&files->loose->base); packfile_store_free(files->packed); odb_source_release(&files->base); free(files); @@ -36,13 +37,14 @@ static void odb_source_files_free(struct odb_source *source) static void odb_source_files_close(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_close(&files->loose->base); packfile_store_close(files->packed); } static void odb_source_files_reprepare(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); - odb_source_loose_reprepare(&files->base); + odb_source_reprepare(&files->loose->base); packfile_store_reprepare(files->packed); } @@ -54,7 +56,7 @@ static int odb_source_files_read_object_info(struct odb_source *source, struct odb_source_files *files = odb_source_files_downcast(source); if (!packfile_store_read_object_info(files->packed, oid, oi, flags) || - !odb_source_loose_read_object_info(source, oid, oi, flags)) + !odb_source_read_object_info(&files->loose->base, oid, oi, flags)) return 0; return -1; @@ -66,7 +68,7 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out, { struct odb_source_files *files = odb_source_files_downcast(source); if (!packfile_store_read_object_stream(out, files->packed, oid) || - !odb_source_loose_read_object_stream(out, source, oid)) + !odb_source_read_object_stream(out, &files->loose->base, oid)) return 0; return -1; } @@ -81,7 +83,7 @@ static int odb_source_files_for_each_object(struct odb_source *source, int ret; if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts); + ret = odb_source_for_each_object(&files->loose->base, request, cb, cb_data, opts); if (ret) return ret; } @@ -108,7 +110,7 @@ static int odb_source_files_count_objects(struct odb_source *source, if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) { unsigned long loose_count; - ret = odb_source_loose_count_objects(source, flags, &loose_count); + ret = odb_source_count_objects(&files->loose->base, flags, &loose_count); if (ret < 0) goto out; @@ -135,7 +137,7 @@ static int odb_source_files_find_abbrev_len(struct odb_source *source, if (ret < 0) goto out; - ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + ret = odb_source_find_abbrev_len(&files->loose->base, oid, len, &len); if (ret < 0) goto out; @@ -151,7 +153,7 @@ static int odb_source_files_freshen_object(struct odb_source *source, { struct odb_source_files *files = odb_source_files_downcast(source); if (packfile_store_freshen_object(files->packed, oid) || - odb_source_loose_freshen_object(source, oid)) + odb_source_freshen_object(&files->loose->base, oid)) return 1; return 0; } @@ -163,8 +165,9 @@ static int odb_source_files_write_object(struct odb_source *source, struct object_id *compat_oid, enum odb_write_object_flags flags) { - return odb_source_loose_write_object(source, buf, len, type, - oid, compat_oid, flags); + struct odb_source_files *files = odb_source_files_downcast(source); + return odb_source_write_object(&files->loose->base, buf, len, type, + oid, compat_oid, flags); } static int odb_source_files_write_object_stream(struct odb_source *source, @@ -172,7 +175,8 @@ static int odb_source_files_write_object_stream(struct odb_source *source, size_t len, struct object_id *oid) { - return odb_source_loose_write_stream(source, stream, len, oid); + struct odb_source_files *files = odb_source_files_downcast(source); + return odb_source_write_object_stream(&files->loose->base, stream, len, oid); } static int odb_source_files_begin_transaction(struct odb_source *source, @@ -264,7 +268,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, CALLOC_ARRAY(files, 1); odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local); - files->loose = odb_source_loose_new(&files->base); + files->loose = odb_source_loose_new(odb, path, local); files->packed = packfile_store_new(&files->base); files->base.free = odb_source_files_free; diff --git a/odb/source-loose.c b/odb/source-loose.c new file mode 100644 index 0000000000..7d7ea2fb84 --- /dev/null +++ b/odb/source-loose.c @@ -0,0 +1,736 @@ +#include "git-compat-util.h" +#include "abspath.h" +#include "chdir-notify.h" +#include "gettext.h" +#include "hex.h" +#include "loose.h" +#include "object-file.h" +#include "object-file-convert.h" +#include "odb.h" +#include "odb/source-files.h" +#include "odb/source-loose.h" +#include "odb/streaming.h" +#include "oidtree.h" +#include "repository.h" +#include "strbuf.h" + +static int append_loose_object(const struct object_id *oid, + const char *path UNUSED, + void *data) +{ + oidtree_insert(data, oid, NULL); + return 0; +} + +static struct oidtree *odb_source_loose_cache(struct odb_source_loose *loose, + const struct object_id *oid) +{ + int subdir_nr = oid->hash[0]; + struct strbuf buf = STRBUF_INIT; + size_t word_bits = bitsizeof(loose->subdir_seen[0]); + size_t word_index = subdir_nr / word_bits; + size_t mask = (size_t)1u << (subdir_nr % word_bits); + uint32_t *bitmap; + + if (subdir_nr < 0 || + (size_t) subdir_nr >= bitsizeof(loose->subdir_seen)) + BUG("subdir_nr out of range"); + + bitmap = &loose->subdir_seen[word_index]; + if (*bitmap & mask) + return loose->cache; + if (!loose->cache) { + ALLOC_ARRAY(loose->cache, 1); + oidtree_init(loose->cache); + } + strbuf_addstr(&buf, loose->base.path); + for_each_file_in_obj_subdir(subdir_nr, &buf, + loose->base.odb->repo->hash_algo, + append_loose_object, + NULL, NULL, + loose->cache); + *bitmap |= mask; + strbuf_release(&buf); + return loose->cache; +} + +static int quick_has_loose(struct odb_source_loose *loose, + const struct object_id *oid) +{ + return !!oidtree_contains(odb_source_loose_cache(loose, oid), oid); +} + +static int read_object_info_from_path(struct odb_source_loose *loose, + const char *path, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + int ret; + int fd; + unsigned long mapsize; + void *map = NULL; + git_zstream stream, *stream_to_end = NULL; + char hdr[MAX_HEADER_LEN]; + unsigned long size_scratch; + enum object_type type_scratch; + struct stat st; + + /* + * If we don't care about type or size, then we don't + * need to look inside the object at all. Note that we + * do not optimize out the stat call, even if the + * caller doesn't care about the disk-size, since our + * return value implicitly indicates whether the + * object even exists. + */ + if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { + struct stat st; + + if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { + ret = quick_has_loose(loose, oid) ? 0 : -1; + goto out; + } + + if (lstat(path, &st) < 0) { + ret = -1; + goto out; + } + + if (oi) { + if (oi->disk_sizep) + *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + } + + ret = 0; + goto out; + } + + fd = git_open(path); + if (fd < 0) { + if (errno != ENOENT) + error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); + ret = -1; + goto out; + } + + if (fstat(fd, &st)) { + close(fd); + ret = -1; + goto out; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + ret = error(_("object file %s is empty"), path); + goto out; + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (!map) { + ret = -1; + goto out; + } + + if (oi->disk_sizep) + *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + + stream_to_end = &stream; + + switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { + case ULHR_OK: + if (!oi->sizep) + oi->sizep = &size_scratch; + if (!oi->typep) + oi->typep = &type_scratch; + + if (parse_loose_header(hdr, oi) < 0) { + ret = error(_("unable to parse %s header"), oid_to_hex(oid)); + goto corrupt; + } + + if (*oi->typep < 0) + die(_("invalid object type")); + + if (oi->contentp) { + *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); + if (!*oi->contentp) { + ret = -1; + goto corrupt; + } + } + + break; + case ULHR_BAD: + ret = error(_("unable to unpack %s header"), + oid_to_hex(oid)); + goto corrupt; + case ULHR_TOO_LONG: + ret = error(_("header for %s too long, exceeds %d bytes"), + oid_to_hex(oid), MAX_HEADER_LEN); + goto corrupt; + } + + ret = 0; + +corrupt: + if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) + die(_("loose object %s (stored in %s) is corrupt"), + oid_to_hex(oid), path); + +out: + if (stream_to_end) + git_inflate_end(stream_to_end); + if (map) + munmap(map, mapsize); + if (oi) { + if (oi->sizep == &size_scratch) + oi->sizep = NULL; + if (oi->typep == &type_scratch) + oi->typep = NULL; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo); + if (!ret) + oi->whence = OI_LOOSE; + } + + return ret; +} + +static int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + static struct strbuf buf = STRBUF_INIT; + + /* + * The second read shouldn't cause new loose objects to show up, unless + * there was a race condition with a secondary process. We don't care + * about this case though, so we simply skip reading loose objects a + * second time. + */ + if (flags & OBJECT_INFO_SECOND_READ) + return -1; + + odb_loose_path(loose, &buf, oid); + return read_object_info_from_path(loose, buf.buf, oid, oi, flags); +} + +/* + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. + * + * The "path" out-parameter will give the path of the object we found (if any). + * Note that it may point to static storage and is only valid until another + * call to open_loose_object(). + */ +static int open_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, const char **path) +{ + static struct strbuf buf = STRBUF_INIT; + int fd; + + *path = odb_loose_path(loose, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; + + return -1; +} + +static void *odb_source_loose_map_object(struct odb_source_loose *loose, + const struct object_id *oid, + unsigned long *size) +{ + const char *p; + int fd = open_loose_object(loose, oid, &p); + void *map = NULL; + struct stat st; + + if (fd < 0) + return NULL; + + if (!fstat(fd, &st)) { + *size = xsize_t(st.st_size); + if (!*size) { + /* mmap() is forbidden on empty files */ + error(_("object file %s is empty"), p); + goto out; + } + + map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); + } + +out: + close(fd); + return map; +} + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = + container_of(_st, struct odb_loose_read_stream, base); + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = + container_of(_st, struct odb_loose_read_stream, base); + + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +static int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + unsigned long size_ul; + void *mapped; + + mapped = odb_source_loose_map_object(loose, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + /* + * object_info.sizep is unsigned long* (32-bit on Windows), but + * st->base.size is size_t (64-bit). Use temporary variable. + * Note: loose objects >4GB would still truncate here, but such + * large loose objects are uncommon (they'd normally be packed). + */ + oi.sizep = &size_ul; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + st->base.size = size_ul; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(mapped, mapsize); + free(st); + return -1; +} + +struct for_each_object_wrapper_data { + struct odb_source_loose *loose; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + + if (data->request) { + struct object_info oi = *data->request; + + if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *node_data UNUSED, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_read_object_info(&data->loose->base, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int odb_source_loose_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + struct for_each_object_wrapper_data data = { + .loose = loose, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + + /* There are no loose promisor objects, so we can return immediately. */ + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + return 0; + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + return 0; + + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(loose, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); +} + +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +static int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_for_each_object(&loose->base, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + +static int count_loose_object(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned long *count = payload; + (*count)++; + return 0; +} + +static int odb_source_loose_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; + char *path = NULL; + DIR *dir = NULL; + int ret; + + if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) { + unsigned long count = 0; + struct dirent *ent; + + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); + goto out; + } + + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } + + *out = count * 256; + ret = 0; + } else { + struct odb_for_each_object_options opts = { 0 }; + *out = 0; + ret = odb_source_for_each_object(&loose->base, NULL, count_loose_object, + out, &opts); + } + +out: + if (dir) + closedir(dir); + free(path); + return ret; +} + +static int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + static struct strbuf path = STRBUF_INIT; + odb_loose_path(loose, &path, oid); + return !!check_and_freshen_file(path.buf, 1); +} + +static int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, + enum odb_write_object_flags flags) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + const struct git_hash_algo *algo = source->odb->repo->hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + struct object_id compat_oid; + char hdr[MAX_HEADER_LEN]; + int hdrlen = sizeof(hdr); + + /* Generate compat_oid */ + if (compat) { + if (compat_oid_in) + oidcpy(&compat_oid, compat_oid_in); + else if (type == OBJ_BLOB) + hash_object_file(compat, buf, len, type, &compat_oid); + else { + struct strbuf converted = STRBUF_INIT; + convert_object_file(source->odb->repo, &converted, algo, compat, + buf, len, type, 0); + hash_object_file(compat, converted.buf, converted.len, + type, &compat_oid); + strbuf_release(&converted); + } + } + + /* Normally if we have it in the pack then we do not bother writing + * it out into .git/objects/??/?{38} file. + */ + write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); + if (odb_freshen_object(source->odb, oid)) + return 0; + if (write_loose_object(loose, oid, hdr, hdrlen, buf, len, 0, flags)) + return -1; + if (compat) + return repo_add_loose_object_map(loose, oid, &compat_oid); + return 0; +} + +static int odb_source_loose_write_object_stream(struct odb_source *source, + struct odb_write_stream *in_stream, + size_t len, + struct object_id *oid) +{ + /* + * TODO: the implementation should be moved here, see the comment on + * the called function in "object-file.h". + */ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + return odb_source_loose_write_stream(loose, in_stream, len, oid); +} + +static int odb_source_loose_begin_transaction(struct odb_source *source UNUSED, + struct odb_transaction **out UNUSED) +{ + /* TODO: this is a known omission that we'll want to address eventually. */ + return error("loose source does not support transactions"); +} + +static int odb_source_loose_read_alternates(struct odb_source *source UNUSED, + struct strvec *out UNUSED) +{ + return 0; +} + +static int odb_source_loose_write_alternate(struct odb_source *source UNUSED, + const char *alternate UNUSED) +{ + return error("loose source does not support alternates"); +} + +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) +{ + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +static void odb_source_loose_reprepare(struct odb_source *source) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + odb_source_loose_clear_cache(loose); +} + +static void odb_source_loose_close(struct odb_source *source UNUSED) +{ + /* Nothing to do. */ +} + +static void odb_source_loose_reparent(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *cb_data) +{ + struct odb_source_loose *loose = cb_data; + char *path = reparent_relative_path(old_cwd, new_cwd, + loose->base.path); + free(loose->base.path); + loose->base.path = path; +} + +static void odb_source_loose_free(struct odb_source *source) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); + chdir_notify_unregister(NULL, odb_source_loose_reparent, loose); + odb_source_release(&loose->base); + free(loose); +} + +struct odb_source_loose *odb_source_loose_new(struct object_database *odb, + const char *path, + bool local) +{ + struct odb_source_loose *loose; + + CALLOC_ARRAY(loose, 1); + odb_source_init(&loose->base, odb, ODB_SOURCE_LOOSE, path, local); + + loose->base.free = odb_source_loose_free; + loose->base.close = odb_source_loose_close; + loose->base.reprepare = odb_source_loose_reprepare; + loose->base.read_object_info = odb_source_loose_read_object_info; + loose->base.read_object_stream = odb_source_loose_read_object_stream; + loose->base.for_each_object = odb_source_loose_for_each_object; + loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len; + loose->base.count_objects = odb_source_loose_count_objects; + loose->base.freshen_object = odb_source_loose_freshen_object; + loose->base.write_object = odb_source_loose_write_object; + loose->base.write_object_stream = odb_source_loose_write_object_stream; + loose->base.begin_transaction = odb_source_loose_begin_transaction; + loose->base.read_alternates = odb_source_loose_read_alternates; + loose->base.write_alternate = odb_source_loose_write_alternate; + + if (!is_absolute_path(loose->base.path)) + chdir_notify_register(NULL, odb_source_loose_reparent, loose); + + return loose; +} diff --git a/odb/source-loose.h b/odb/source-loose.h new file mode 100644 index 0000000000..6070aaf3ce --- /dev/null +++ b/odb/source-loose.h @@ -0,0 +1,48 @@ +#ifndef ODB_SOURCE_LOOSE_H +#define ODB_SOURCE_LOOSE_H + +#include "odb/source.h" + +struct odb_source_files; +struct object_database; +struct oidtree; + +/* + * An object database source that stores its objects in loose format, one + * file per object. + */ +struct odb_source_loose { + struct odb_source base; + + /* + * Used to store the results of readdir(3) calls when we are OK + * sacrificing accuracy due to races for speed. That includes + * object existence with OBJECT_INFO_QUICK, as well as + * our search for unique abbreviated hashes. Don't use it for tasks + * requiring greater accuracy! + * + * Be sure to call odb_load_loose_cache() before using. + */ + uint32_t subdir_seen[8]; /* 256 bits */ + struct oidtree *cache; + + /* Map between object IDs for loose objects. */ + struct loose_object_map *map; +}; + +struct odb_source_loose *odb_source_loose_new(struct object_database *odb, + const char *path, + bool local); + +/* + * Cast the given object database source to the loose backend. This will cause + * a BUG in case the source doesn't use this backend. + */ +static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_source *source) +{ + if (source->type != ODB_SOURCE_LOOSE) + BUG("trying to downcast source of type '%d' to loose", source->type); + return container_of(source, struct odb_source_loose, base); +} + +#endif diff --git a/odb/source.h b/odb/source.h index 0a440884e4..8bcb67787e 100644 --- a/odb/source.h +++ b/odb/source.h @@ -14,6 +14,9 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + /* The "loose" backend that uses loose objects, only. */ + ODB_SOURCE_LOOSE, + /* The "in-memory" backend that stores objects in memory. */ ODB_SOURCE_INMEMORY, };