Merge branch 'ps/odb-source-loose'

The loose object source has been refactored into a proper `struct
odb_source`.

* ps/odb-source-loose:
  odb/source-loose: drop pointer to the "files" source
  odb/source-loose: stub out remaining callbacks
  odb/source-loose: wire up `write_object_stream()` callback
  object-file: refactor writing objects to use loose source
  odb/source-loose: wire up `write_object()` callback
  loose: refactor object map to operate on `struct odb_source_loose`
  odb/source-loose: wire up `freshen_object()` callback
  odb/source-loose: drop `odb_source_loose_has_object()`
  odb/source-loose: wire up `count_objects()` callback
  odb/source-loose: wire up `find_abbrev_len()` callback
  odb/source-loose: wire up `for_each_object()` callback
  odb/source-loose: wire up `read_object_stream()` callback
  odb/source-loose: wire up `read_object_info()` callback
  odb/source-loose: wire up `close()` callback
  odb/source-loose: wire up `reprepare()` callback
  odb/source-loose: start converting to a proper `struct odb_source`
  odb/source-loose: store pointer to "files" instead of generic source
  odb/source-loose: move loose source into "odb/" subsystem
This commit is contained in:
Junio C Hamano
2026-06-11 04:31:18 -07:00
15 changed files with 973 additions and 870 deletions

View File

@@ -1217,6 +1217,7 @@ LIB_OBJS += odb.o
LIB_OBJS += odb/source.o
LIB_OBJS += odb/source-files.o
LIB_OBJS += odb/source-inmemory.o
LIB_OBJS += odb/source-loose.o
LIB_OBJS += odb/streaming.o
LIB_OBJS += odb/transaction.o
LIB_OBJS += oid-array.o

View File

@@ -890,8 +890,9 @@ static void batch_each_object(struct batch_options *opt,
*/
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next) {
int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
&payload, &opts);
struct odb_source_files *files = odb_source_files_downcast(source);
int ret = odb_source_for_each_object(&files->loose->base, NULL, batch_one_object_oi,
&payload, &opts);
if (ret)
break;
}

View File

@@ -466,6 +466,7 @@ out:
static int too_many_loose_objects(int limit)
{
struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
/*
* This is weird, but stems from legacy behaviour: the GC auto
* threshold was always essentially interpreted as if it was rounded up
@@ -474,9 +475,8 @@ static int too_many_loose_objects(int limit)
int auto_threshold = DIV_ROUND_UP(limit, 256) * 256;
unsigned long loose_count;
if (odb_source_loose_count_objects(the_repository->objects->sources,
ODB_COUNT_OBJECTS_APPROXIMATE,
&loose_count) < 0)
if (odb_source_count_objects(&files->loose->base, ODB_COUNT_OBJECTS_APPROXIMATE,
&loose_count) < 0)
return 0;
return loose_count > auto_threshold;

View File

@@ -1750,9 +1750,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
* skip the local object source.
*/
struct odb_source *source = the_repository->objects->sources->next;
for (; source; source = source->next)
if (odb_source_loose_has_object(source, oid))
for (; source; source = source->next) {
struct odb_source_files *files = odb_source_files_downcast(source);
if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0))
return 0;
}
}
/*
@@ -4135,9 +4137,11 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
struct odb_source *source = the_repository->objects->sources;
int found = 0;
for (; !found && source; source = source->next)
if (odb_source_loose_has_object(source, oid))
for (; !found && source; source = source->next) {
struct odb_source_files *files = odb_source_files_downcast(source);
if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0))
found = 1;
}
/*
* If a traversed tree has a missing blob then we want

View File

@@ -539,8 +539,9 @@ static int fetch_object(struct walker *walker, const struct object_id *oid)
} else if (!oideq(&obj_req->oid, &req->real_oid)) {
ret = error("File %s has bad hash", hex);
} else if (req->rename < 0) {
struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
struct strbuf buf = STRBUF_INIT;
odb_loose_path(the_repository->objects->sources, &buf, &req->oid);
odb_loose_path(files->loose, &buf, &req->oid);
ret = error("unable to write sha1 filename %s", buf.buf);
strbuf_release(&buf);
}

6
http.c
View File

@@ -2826,6 +2826,7 @@ static size_t fwrite_sha1_file(char *ptr, size_t eltsize, size_t nmemb,
struct http_object_request *new_http_object_request(const char *base_url,
const struct object_id *oid)
{
struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
char *hex = oid_to_hex(oid);
struct strbuf filename = STRBUF_INIT;
struct strbuf prevfile = STRBUF_INIT;
@@ -2840,7 +2841,7 @@ struct http_object_request *new_http_object_request(const char *base_url,
oidcpy(&freq->oid, oid);
freq->localfile = -1;
odb_loose_path(the_repository->objects->sources, &filename, oid);
odb_loose_path(files->loose, &filename, oid);
strbuf_addf(&freq->tmpfile, "%s.temp", filename.buf);
strbuf_addf(&prevfile, "%s.prev", filename.buf);
@@ -2966,6 +2967,7 @@ void process_http_object_request(struct http_object_request *freq)
int finish_http_object_request(struct http_object_request *freq)
{
struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
struct stat st;
struct strbuf filename = STRBUF_INIT;
@@ -2992,7 +2994,7 @@ int finish_http_object_request(struct http_object_request *freq)
unlink_or_warn(freq->tmpfile.buf);
return -1;
}
odb_loose_path(the_repository->objects->sources, &filename, &freq->oid);
odb_loose_path(files->loose, &filename, &freq->oid);
freq->rename = finalize_object_file(the_repository, freq->tmpfile.buf, filename.buf);
strbuf_release(&filename);

45
loose.c
View File

@@ -46,38 +46,36 @@ static int insert_oid_pair(kh_oid_map_t *map, const struct object_id *key, const
return 1;
}
static int insert_loose_map(struct odb_source *source,
static int insert_loose_map(struct odb_source_loose *loose,
const struct object_id *oid,
const struct object_id *compat_oid)
{
struct odb_source_files *files = odb_source_files_downcast(source);
struct loose_object_map *map = files->loose->map;
struct loose_object_map *map = loose->map;
int inserted = 0;
inserted |= insert_oid_pair(map->to_compat, oid, compat_oid);
inserted |= insert_oid_pair(map->to_storage, compat_oid, oid);
if (inserted)
oidtree_insert(files->loose->cache, compat_oid, NULL);
oidtree_insert(loose->cache, compat_oid, NULL);
return inserted;
}
static int load_one_loose_object_map(struct repository *repo, struct odb_source *source)
static int load_one_loose_object_map(struct repository *repo, struct odb_source_loose *loose)
{
struct odb_source_files *files = odb_source_files_downcast(source);
struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
FILE *fp;
if (!files->loose->map)
loose_object_map_init(&files->loose->map);
if (!files->loose->cache) {
ALLOC_ARRAY(files->loose->cache, 1);
oidtree_init(files->loose->cache);
if (!loose->map)
loose_object_map_init(&loose->map);
if (!loose->cache) {
ALLOC_ARRAY(loose->cache, 1);
oidtree_init(loose->cache);
}
insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree);
insert_loose_map(source, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob);
insert_loose_map(source, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid);
insert_loose_map(loose, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree);
insert_loose_map(loose, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob);
insert_loose_map(loose, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid);
repo_common_path_replace(repo, &path, "objects/loose-object-idx");
fp = fopen(path.buf, "rb");
@@ -97,7 +95,7 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source
parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) ||
p != buf.buf + buf.len)
goto err;
insert_loose_map(source, &oid, &compat_oid);
insert_loose_map(loose, &oid, &compat_oid);
}
strbuf_release(&buf);
@@ -119,7 +117,8 @@ int repo_read_loose_object_map(struct repository *repo)
odb_prepare_alternates(repo->objects);
for (source = repo->objects->sources; source; source = source->next) {
if (load_one_loose_object_map(repo, source) < 0) {
struct odb_source_files *files = odb_source_files_downcast(source);
if (load_one_loose_object_map(repo, files->loose) < 0) {
return -1;
}
}
@@ -171,7 +170,7 @@ errout:
return -1;
}
static int write_one_object(struct odb_source *source,
static int write_one_object(struct odb_source_loose *loose,
const struct object_id *oid,
const struct object_id *compat_oid)
{
@@ -180,7 +179,7 @@ static int write_one_object(struct odb_source *source,
struct stat st;
struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
strbuf_addf(&path, "%s/loose-object-idx", source->path);
strbuf_addf(&path, "%s/loose-object-idx", loose->base.path);
hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1);
fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666);
@@ -196,7 +195,7 @@ static int write_one_object(struct odb_source *source,
goto errout;
if (close(fd))
goto errout;
adjust_shared_perm(source->odb->repo, path.buf);
adjust_shared_perm(loose->base.odb->repo, path.buf);
rollback_lock_file(&lock);
strbuf_release(&buf);
strbuf_release(&path);
@@ -210,18 +209,18 @@ errout:
return -1;
}
int repo_add_loose_object_map(struct odb_source *source,
int repo_add_loose_object_map(struct odb_source_loose *loose,
const struct object_id *oid,
const struct object_id *compat_oid)
{
int inserted = 0;
if (!should_use_loose_object_map(source->odb->repo))
if (!should_use_loose_object_map(loose->base.odb->repo))
return 0;
inserted = insert_loose_map(source, oid, compat_oid);
inserted = insert_loose_map(loose, oid, compat_oid);
if (inserted)
return write_one_object(source, oid, compat_oid);
return write_one_object(loose, oid, compat_oid);
return 0;
}

View File

@@ -4,7 +4,7 @@
#include "khash.h"
struct repository;
struct odb_source;
struct odb_source_loose;
struct loose_object_map {
kh_oid_map_t *to_compat;
@@ -17,7 +17,7 @@ int repo_loose_object_map_oid(struct repository *repo,
const struct object_id *src,
const struct git_hash_algo *dest_algo,
struct object_id *dest);
int repo_add_loose_object_map(struct odb_source *source,
int repo_add_loose_object_map(struct odb_source_loose *loose,
const struct object_id *oid,
const struct object_id *compat_oid);
int repo_read_loose_object_map(struct repository *repo);

View File

@@ -405,6 +405,7 @@ libgit_sources = [
'odb/source.c',
'odb/source-files.c',
'odb/source-inmemory.c',
'odb/source-loose.c',
'odb/streaming.c',
'odb/transaction.c',
'oid-array.c',

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,10 @@
#include "git-zlib.h"
#include "object.h"
#include "odb.h"
#include "odb/source-loose.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
struct index_state;
@@ -17,61 +21,19 @@ int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct s
int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
struct object_info;
struct odb_read_stream;
struct odb_source;
struct odb_source_loose {
struct odb_source *source;
/*
* Used to store the results of readdir(3) calls when we are OK
* sacrificing accuracy due to races for speed. That includes
* object existence with OBJECT_INFO_QUICK, as well as
* our search for unique abbreviated hashes. Don't use it for tasks
* requiring greater accuracy!
*
* Be sure to call odb_load_loose_cache() before using.
*/
uint32_t subdir_seen[8]; /* 256 bits */
struct oidtree *cache;
/* Map between object IDs for loose objects. */
struct loose_object_map *map;
};
struct odb_source_loose *odb_source_loose_new(struct odb_source *source);
void odb_source_loose_free(struct odb_source_loose *loose);
/* Reprepare the loose source by emptying the loose object cache. */
void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid);
/*
* Return true iff an object database source has a loose object
* with the specified name. This function does not respect replace
* references.
* Write the given stream into the loose object source. The only difference
* from the generic implementation of this function is that we don't perform an
* object existence check here.
*
* TODO: We should stop exposing this function altogether and move it into
* "odb/source-loose.c". This requires a couple of refactorings though to make
* `force_object_loose()` generic and is thus postponed to a later point in
* time.
*/
int odb_source_loose_has_object(struct odb_source *source,
const struct object_id *oid);
int odb_source_loose_freshen_object(struct odb_source *source,
const struct object_id *oid);
int odb_source_loose_write_object(struct odb_source *source,
const void *buf, unsigned long len,
enum object_type type, struct object_id *oid,
struct object_id *compat_oid_in,
enum odb_write_object_flags flags);
int odb_source_loose_write_stream(struct odb_source *source,
int odb_source_loose_write_stream(struct odb_source_loose *source,
struct odb_write_stream *stream, size_t len,
struct object_id *oid);
@@ -79,7 +41,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
* Put in `buf` the name of the file in the local object database that
* would be used to store a loose object with the specified oid.
*/
const char *odb_loose_path(struct odb_source *source,
const char *odb_loose_path(struct odb_source_loose *source,
struct strbuf *buf,
const struct object_id *oid);
@@ -119,45 +81,13 @@ int for_each_loose_file_in_source(struct odb_source *source,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data);
/*
* Iterate through all loose objects in the given object database source and
* invoke the callback function for each of them. If an object info request is
* given, then the object info will be read for every individual object and
* passed to the callback as if `odb_source_loose_read_object_info()` was
* called for the object.
*/
int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts);
/*
* Count the number of loose objects in this source.
*
* The object count is approximated by opening a single sharding directory for
* loose objects and scanning its contents. The result is then extrapolated by
* 256. This should generally work as a reasonable estimate given that the
* object hash is supposed to be indistinguishable from random.
*
* Returns 0 on success, a negative error code otherwise.
*/
int odb_source_loose_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags,
unsigned long *out);
/*
* Find the shortest unique prefix for the given object ID, where `min_len` is
* the minimum length that the prefix should have.
*
* Returns 0 on success, in which case the computed length will be written to
* `out`. Otherwise, a negative error code is returned.
*/
int odb_source_loose_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out);
int for_each_file_in_obj_subdir(unsigned int subdir_nr,
struct strbuf *path,
const struct git_hash_algo *algop,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
@@ -203,6 +133,14 @@ int finalize_object_file_flags(struct repository *repo,
void hash_object_file(const struct git_hash_algo *algo, const void *buf,
unsigned long len, enum object_type type,
struct object_id *oid);
void write_object_file_prepare(const struct git_hash_algo *algo,
const void *buf, unsigned long len,
enum object_type type, struct object_id *oid,
char *hdr, int *hdrlen);
int write_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, char *hdr,
int hdrlen, const void *buf, unsigned long len,
time_t mtime, unsigned flags);
/* Helper to check and "touch" a file */
int check_and_freshen_file(const char *fn, int freshen);
@@ -222,6 +160,35 @@ int read_loose_object(struct repository *repo,
void **contents,
struct object_info *oi);
enum unpack_loose_header_result {
ULHR_OK,
ULHR_BAD,
ULHR_TOO_LONG,
};
/**
* unpack_loose_header() initializes the data stream needed to unpack
* a loose object header.
*
* Returns:
*
* - ULHR_OK on success
* - ULHR_BAD on error
* - ULHR_TOO_LONG if the header was too long
*
* It will only parse up to MAX_HEADER_LEN bytes.
*/
enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned char *map,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz);
void *unpack_loose_rest(git_zstream *stream,
void *buffer, unsigned long size,
const struct object_id *oid);
int parse_loose_header(const char *hdr, struct object_info *oi);
struct odb_transaction;
/*

View File

@@ -7,6 +7,7 @@
#include "odb.h"
#include "odb/source.h"
#include "odb/source-files.h"
#include "odb/source-loose.h"
#include "packfile.h"
#include "strbuf.h"
#include "write-or-die.h"
@@ -27,7 +28,7 @@ static void odb_source_files_free(struct odb_source *source)
{
struct odb_source_files *files = odb_source_files_downcast(source);
chdir_notify_unregister(NULL, odb_source_files_reparent, files);
odb_source_loose_free(files->loose);
odb_source_free(&files->loose->base);
packfile_store_free(files->packed);
odb_source_release(&files->base);
free(files);
@@ -36,13 +37,14 @@ static void odb_source_files_free(struct odb_source *source)
static void odb_source_files_close(struct odb_source *source)
{
struct odb_source_files *files = odb_source_files_downcast(source);
odb_source_close(&files->loose->base);
packfile_store_close(files->packed);
}
static void odb_source_files_reprepare(struct odb_source *source)
{
struct odb_source_files *files = odb_source_files_downcast(source);
odb_source_loose_reprepare(&files->base);
odb_source_reprepare(&files->loose->base);
packfile_store_reprepare(files->packed);
}
@@ -54,7 +56,7 @@ static int odb_source_files_read_object_info(struct odb_source *source,
struct odb_source_files *files = odb_source_files_downcast(source);
if (!packfile_store_read_object_info(files->packed, oid, oi, flags) ||
!odb_source_loose_read_object_info(source, oid, oi, flags))
!odb_source_read_object_info(&files->loose->base, oid, oi, flags))
return 0;
return -1;
@@ -66,7 +68,7 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out,
{
struct odb_source_files *files = odb_source_files_downcast(source);
if (!packfile_store_read_object_stream(out, files->packed, oid) ||
!odb_source_loose_read_object_stream(out, source, oid))
!odb_source_read_object_stream(out, &files->loose->base, oid))
return 0;
return -1;
}
@@ -81,7 +83,7 @@ static int odb_source_files_for_each_object(struct odb_source *source,
int ret;
if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts);
ret = odb_source_for_each_object(&files->loose->base, request, cb, cb_data, opts);
if (ret)
return ret;
}
@@ -108,7 +110,7 @@ static int odb_source_files_count_objects(struct odb_source *source,
if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) {
unsigned long loose_count;
ret = odb_source_loose_count_objects(source, flags, &loose_count);
ret = odb_source_count_objects(&files->loose->base, flags, &loose_count);
if (ret < 0)
goto out;
@@ -135,7 +137,7 @@ static int odb_source_files_find_abbrev_len(struct odb_source *source,
if (ret < 0)
goto out;
ret = odb_source_loose_find_abbrev_len(source, oid, len, &len);
ret = odb_source_find_abbrev_len(&files->loose->base, oid, len, &len);
if (ret < 0)
goto out;
@@ -151,7 +153,7 @@ static int odb_source_files_freshen_object(struct odb_source *source,
{
struct odb_source_files *files = odb_source_files_downcast(source);
if (packfile_store_freshen_object(files->packed, oid) ||
odb_source_loose_freshen_object(source, oid))
odb_source_freshen_object(&files->loose->base, oid))
return 1;
return 0;
}
@@ -163,8 +165,9 @@ static int odb_source_files_write_object(struct odb_source *source,
struct object_id *compat_oid,
enum odb_write_object_flags flags)
{
return odb_source_loose_write_object(source, buf, len, type,
oid, compat_oid, flags);
struct odb_source_files *files = odb_source_files_downcast(source);
return odb_source_write_object(&files->loose->base, buf, len, type,
oid, compat_oid, flags);
}
static int odb_source_files_write_object_stream(struct odb_source *source,
@@ -172,7 +175,8 @@ static int odb_source_files_write_object_stream(struct odb_source *source,
size_t len,
struct object_id *oid)
{
return odb_source_loose_write_stream(source, stream, len, oid);
struct odb_source_files *files = odb_source_files_downcast(source);
return odb_source_write_object_stream(&files->loose->base, stream, len, oid);
}
static int odb_source_files_begin_transaction(struct odb_source *source,
@@ -264,7 +268,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
CALLOC_ARRAY(files, 1);
odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local);
files->loose = odb_source_loose_new(&files->base);
files->loose = odb_source_loose_new(odb, path, local);
files->packed = packfile_store_new(&files->base);
files->base.free = odb_source_files_free;

736
odb/source-loose.c Normal file
View File

@@ -0,0 +1,736 @@
#include "git-compat-util.h"
#include "abspath.h"
#include "chdir-notify.h"
#include "gettext.h"
#include "hex.h"
#include "loose.h"
#include "object-file.h"
#include "object-file-convert.h"
#include "odb.h"
#include "odb/source-files.h"
#include "odb/source-loose.h"
#include "odb/streaming.h"
#include "oidtree.h"
#include "repository.h"
#include "strbuf.h"
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
{
oidtree_insert(data, oid, NULL);
return 0;
}
static struct oidtree *odb_source_loose_cache(struct odb_source_loose *loose,
const struct object_id *oid)
{
int subdir_nr = oid->hash[0];
struct strbuf buf = STRBUF_INIT;
size_t word_bits = bitsizeof(loose->subdir_seen[0]);
size_t word_index = subdir_nr / word_bits;
size_t mask = (size_t)1u << (subdir_nr % word_bits);
uint32_t *bitmap;
if (subdir_nr < 0 ||
(size_t) subdir_nr >= bitsizeof(loose->subdir_seen))
BUG("subdir_nr out of range");
bitmap = &loose->subdir_seen[word_index];
if (*bitmap & mask)
return loose->cache;
if (!loose->cache) {
ALLOC_ARRAY(loose->cache, 1);
oidtree_init(loose->cache);
}
strbuf_addstr(&buf, loose->base.path);
for_each_file_in_obj_subdir(subdir_nr, &buf,
loose->base.odb->repo->hash_algo,
append_loose_object,
NULL, NULL,
loose->cache);
*bitmap |= mask;
strbuf_release(&buf);
return loose->cache;
}
static int quick_has_loose(struct odb_source_loose *loose,
const struct object_id *oid)
{
return !!oidtree_contains(odb_source_loose_cache(loose, oid), oid);
}
static int read_object_info_from_path(struct odb_source_loose *loose,
const char *path,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags)
{
int ret;
int fd;
unsigned long mapsize;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
struct stat st;
/*
* If we don't care about type or size, then we don't
* need to look inside the object at all. Note that we
* do not optimize out the stat call, even if the
* caller doesn't care about the disk-size, since our
* return value implicitly indicates whether the
* object even exists.
*/
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(loose, oid) ? 0 : -1;
goto out;
}
if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
if (oi) {
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
if (oi->mtimep)
*oi->mtimep = st.st_mtime;
}
ret = 0;
goto out;
}
fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
ret = -1;
goto out;
}
if (fstat(fd, &st)) {
close(fd);
ret = -1;
goto out;
}
mapsize = xsize_t(st.st_size);
if (!mapsize) {
close(fd);
ret = error(_("object file %s is empty"), path);
goto out;
}
map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (!map) {
ret = -1;
goto out;
}
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
if (oi->mtimep)
*oi->mtimep = st.st_mtime;
stream_to_end = &stream;
switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
case ULHR_OK:
if (!oi->sizep)
oi->sizep = &size_scratch;
if (!oi->typep)
oi->typep = &type_scratch;
if (parse_loose_header(hdr, oi) < 0) {
ret = error(_("unable to parse %s header"), oid_to_hex(oid));
goto corrupt;
}
if (*oi->typep < 0)
die(_("invalid object type"));
if (oi->contentp) {
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
if (!*oi->contentp) {
ret = -1;
goto corrupt;
}
}
break;
case ULHR_BAD:
ret = error(_("unable to unpack %s header"),
oid_to_hex(oid));
goto corrupt;
case ULHR_TOO_LONG:
ret = error(_("header for %s too long, exceeds %d bytes"),
oid_to_hex(oid), MAX_HEADER_LEN);
goto corrupt;
}
ret = 0;
corrupt:
if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
die(_("loose object %s (stored in %s) is corrupt"),
oid_to_hex(oid), path);
out:
if (stream_to_end)
git_inflate_end(stream_to_end);
if (map)
munmap(map, mapsize);
if (oi) {
if (oi->sizep == &size_scratch)
oi->sizep = NULL;
if (oi->typep == &type_scratch)
oi->typep = NULL;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo);
if (!ret)
oi->whence = OI_LOOSE;
}
return ret;
}
static int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
static struct strbuf buf = STRBUF_INIT;
/*
* The second read shouldn't cause new loose objects to show up, unless
* there was a race condition with a secondary process. We don't care
* about this case though, so we simply skip reading loose objects a
* second time.
*/
if (flags & OBJECT_INFO_SECOND_READ)
return -1;
odb_loose_path(loose, &buf, oid);
return read_object_info_from_path(loose, buf.buf, oid, oi, flags);
}
/*
* Find "oid" as a loose object in given source, open the object and return its
* file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to open_loose_object().
*/
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
static struct strbuf buf = STRBUF_INIT;
int fd;
*path = odb_loose_path(loose, &buf, oid);
fd = git_open(*path);
if (fd >= 0)
return fd;
return -1;
}
static void *odb_source_loose_map_object(struct odb_source_loose *loose,
const struct object_id *oid,
unsigned long *size)
{
const char *p;
int fd = open_loose_object(loose, oid, &p);
void *map = NULL;
struct stat st;
if (fd < 0)
return NULL;
if (!fstat(fd, &st)) {
*size = xsize_t(st.st_size);
if (!*size) {
/* mmap() is forbidden on empty files */
error(_("object file %s is empty"), p);
goto out;
}
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
}
out:
close(fd);
return map;
}
struct odb_loose_read_stream {
struct odb_read_stream base;
git_zstream z;
enum {
ODB_LOOSE_READ_STREAM_INUSE,
ODB_LOOSE_READ_STREAM_DONE,
ODB_LOOSE_READ_STREAM_ERROR,
} z_state;
void *mapped;
unsigned long mapsize;
char hdr[32];
int hdr_avail;
int hdr_used;
};
static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
{
struct odb_loose_read_stream *st =
container_of(_st, struct odb_loose_read_stream, base);
size_t total_read = 0;
switch (st->z_state) {
case ODB_LOOSE_READ_STREAM_DONE:
return 0;
case ODB_LOOSE_READ_STREAM_ERROR:
return -1;
default:
break;
}
if (st->hdr_used < st->hdr_avail) {
size_t to_copy = st->hdr_avail - st->hdr_used;
if (sz < to_copy)
to_copy = sz;
memcpy(buf, st->hdr + st->hdr_used, to_copy);
st->hdr_used += to_copy;
total_read += to_copy;
}
while (total_read < sz) {
int status;
st->z.next_out = (unsigned char *)buf + total_read;
st->z.avail_out = sz - total_read;
status = git_inflate(&st->z, Z_FINISH);
total_read = st->z.next_out - (unsigned char *)buf;
if (status == Z_STREAM_END) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_DONE;
break;
}
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
git_inflate_end(&st->z);
st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
return -1;
}
}
return total_read;
}
static int close_istream_loose(struct odb_read_stream *_st)
{
struct odb_loose_read_stream *st =
container_of(_st, struct odb_loose_read_stream, base);
if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
git_inflate_end(&st->z);
munmap(st->mapped, st->mapsize);
return 0;
}
static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
struct object_info oi = OBJECT_INFO_INIT;
struct odb_loose_read_stream *st;
unsigned long mapsize;
unsigned long size_ul;
void *mapped;
mapped = odb_source_loose_map_object(loose, oid, &mapsize);
if (!mapped)
return -1;
/*
* Note: we must allocate this structure early even though we may still
* fail. This is because we need to initialize the zlib stream, and it
* is not possible to copy the stream around after the fact because it
* has self-referencing pointers.
*/
CALLOC_ARRAY(st, 1);
switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
sizeof(st->hdr))) {
case ULHR_OK:
break;
case ULHR_BAD:
case ULHR_TOO_LONG:
goto error;
}
/*
* object_info.sizep is unsigned long* (32-bit on Windows), but
* st->base.size is size_t (64-bit). Use temporary variable.
* Note: loose objects >4GB would still truncate here, but such
* large loose objects are uncommon (they'd normally be packed).
*/
oi.sizep = &size_ul;
oi.typep = &st->base.type;
if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
goto error;
st->base.size = size_ul;
st->mapped = mapped;
st->mapsize = mapsize;
st->hdr_used = strlen(st->hdr) + 1;
st->hdr_avail = st->z.total_out;
st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
st->base.close = close_istream_loose;
st->base.read = read_istream_loose;
*out = &st->base;
return 0;
error:
git_inflate_end(&st->z);
munmap(mapped, mapsize);
free(st);
return -1;
}
struct for_each_object_wrapper_data {
struct odb_source_loose *loose;
const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
static int for_each_object_wrapper_cb(const struct object_id *oid,
const char *path,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
if (data->request) {
struct object_info oi = *data->request;
if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0)
return -1;
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
void *node_data UNUSED,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
if (data->request) {
struct object_info oi = *data->request;
if (odb_source_read_object_info(&data->loose->base,
oid, &oi, 0) < 0)
return -1;
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
struct for_each_object_wrapper_data data = {
.loose = loose,
.request = request,
.cb = cb,
.cb_data = cb_data,
};
/* There are no loose promisor objects, so we can return immediately. */
if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
return 0;
if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
return 0;
if (opts->prefix)
return oidtree_each(odb_source_loose_cache(loose, opts->prefix),
opts->prefix, opts->prefix_hex_len,
for_each_prefixed_object_wrapper_cb, &data);
return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
NULL, NULL, &data);
}
struct find_abbrev_len_data {
const struct object_id *oid;
unsigned len;
};
static int find_abbrev_len_cb(const struct object_id *oid,
struct object_info *oi UNUSED,
void *cb_data)
{
struct find_abbrev_len_data *data = cb_data;
unsigned len = oid_common_prefix_hexlen(oid, data->oid);
if (len != hash_algos[oid->algo].hexsz && len >= data->len)
data->len = len + 1;
return 0;
}
static int odb_source_loose_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
struct odb_for_each_object_options opts = {
.prefix = oid,
.prefix_hex_len = min_len,
};
struct find_abbrev_len_data data = {
.oid = oid,
.len = min_len,
};
int ret;
ret = odb_source_for_each_object(&loose->base, NULL, find_abbrev_len_cb,
&data, &opts);
*out = data.len;
return ret;
}
static int count_loose_object(const struct object_id *oid UNUSED,
struct object_info *oi UNUSED,
void *payload)
{
unsigned long *count = payload;
(*count)++;
return 0;
}
static int odb_source_loose_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags,
unsigned long *out)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
char *path = NULL;
DIR *dir = NULL;
int ret;
if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) {
unsigned long count = 0;
struct dirent *ent;
path = xstrfmt("%s/17", source->path);
dir = opendir(path);
if (!dir) {
if (errno == ENOENT) {
*out = 0;
ret = 0;
goto out;
}
ret = error_errno("cannot open object shard '%s'", path);
goto out;
}
while ((ent = readdir(dir)) != NULL) {
if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
ent->d_name[hexsz] != '\0')
continue;
count++;
}
*out = count * 256;
ret = 0;
} else {
struct odb_for_each_object_options opts = { 0 };
*out = 0;
ret = odb_source_for_each_object(&loose->base, NULL, count_loose_object,
out, &opts);
}
out:
if (dir)
closedir(dir);
free(path);
return ret;
}
static int odb_source_loose_freshen_object(struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
static struct strbuf path = STRBUF_INIT;
odb_loose_path(loose, &path, oid);
return !!check_and_freshen_file(path.buf, 1);
}
static int odb_source_loose_write_object(struct odb_source *source,
const void *buf, unsigned long len,
enum object_type type, struct object_id *oid,
struct object_id *compat_oid_in,
enum odb_write_object_flags flags)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
const struct git_hash_algo *algo = source->odb->repo->hash_algo;
const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
struct object_id compat_oid;
char hdr[MAX_HEADER_LEN];
int hdrlen = sizeof(hdr);
/* Generate compat_oid */
if (compat) {
if (compat_oid_in)
oidcpy(&compat_oid, compat_oid_in);
else if (type == OBJ_BLOB)
hash_object_file(compat, buf, len, type, &compat_oid);
else {
struct strbuf converted = STRBUF_INIT;
convert_object_file(source->odb->repo, &converted, algo, compat,
buf, len, type, 0);
hash_object_file(compat, converted.buf, converted.len,
type, &compat_oid);
strbuf_release(&converted);
}
}
/* Normally if we have it in the pack then we do not bother writing
* it out into .git/objects/??/?{38} file.
*/
write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
if (odb_freshen_object(source->odb, oid))
return 0;
if (write_loose_object(loose, oid, hdr, hdrlen, buf, len, 0, flags))
return -1;
if (compat)
return repo_add_loose_object_map(loose, oid, &compat_oid);
return 0;
}
static int odb_source_loose_write_object_stream(struct odb_source *source,
struct odb_write_stream *in_stream,
size_t len,
struct object_id *oid)
{
/*
* TODO: the implementation should be moved here, see the comment on
* the called function in "object-file.h".
*/
struct odb_source_loose *loose = odb_source_loose_downcast(source);
return odb_source_loose_write_stream(loose, in_stream, len, oid);
}
static int odb_source_loose_begin_transaction(struct odb_source *source UNUSED,
struct odb_transaction **out UNUSED)
{
/* TODO: this is a known omission that we'll want to address eventually. */
return error("loose source does not support transactions");
}
static int odb_source_loose_read_alternates(struct odb_source *source UNUSED,
struct strvec *out UNUSED)
{
return 0;
}
static int odb_source_loose_write_alternate(struct odb_source *source UNUSED,
const char *alternate UNUSED)
{
return error("loose source does not support alternates");
}
static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
{
oidtree_clear(loose->cache);
FREE_AND_NULL(loose->cache);
memset(&loose->subdir_seen, 0,
sizeof(loose->subdir_seen));
}
static void odb_source_loose_reprepare(struct odb_source *source)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
odb_source_loose_clear_cache(loose);
}
static void odb_source_loose_close(struct odb_source *source UNUSED)
{
/* Nothing to do. */
}
static void odb_source_loose_reparent(const char *name UNUSED,
const char *old_cwd,
const char *new_cwd,
void *cb_data)
{
struct odb_source_loose *loose = cb_data;
char *path = reparent_relative_path(old_cwd, new_cwd,
loose->base.path);
free(loose->base.path);
loose->base.path = path;
}
static void odb_source_loose_free(struct odb_source *source)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
odb_source_loose_clear_cache(loose);
loose_object_map_clear(&loose->map);
chdir_notify_unregister(NULL, odb_source_loose_reparent, loose);
odb_source_release(&loose->base);
free(loose);
}
struct odb_source_loose *odb_source_loose_new(struct object_database *odb,
const char *path,
bool local)
{
struct odb_source_loose *loose;
CALLOC_ARRAY(loose, 1);
odb_source_init(&loose->base, odb, ODB_SOURCE_LOOSE, path, local);
loose->base.free = odb_source_loose_free;
loose->base.close = odb_source_loose_close;
loose->base.reprepare = odb_source_loose_reprepare;
loose->base.read_object_info = odb_source_loose_read_object_info;
loose->base.read_object_stream = odb_source_loose_read_object_stream;
loose->base.for_each_object = odb_source_loose_for_each_object;
loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len;
loose->base.count_objects = odb_source_loose_count_objects;
loose->base.freshen_object = odb_source_loose_freshen_object;
loose->base.write_object = odb_source_loose_write_object;
loose->base.write_object_stream = odb_source_loose_write_object_stream;
loose->base.begin_transaction = odb_source_loose_begin_transaction;
loose->base.read_alternates = odb_source_loose_read_alternates;
loose->base.write_alternate = odb_source_loose_write_alternate;
if (!is_absolute_path(loose->base.path))
chdir_notify_register(NULL, odb_source_loose_reparent, loose);
return loose;
}

48
odb/source-loose.h Normal file
View File

@@ -0,0 +1,48 @@
#ifndef ODB_SOURCE_LOOSE_H
#define ODB_SOURCE_LOOSE_H
#include "odb/source.h"
struct odb_source_files;
struct object_database;
struct oidtree;
/*
* An object database source that stores its objects in loose format, one
* file per object.
*/
struct odb_source_loose {
struct odb_source base;
/*
* Used to store the results of readdir(3) calls when we are OK
* sacrificing accuracy due to races for speed. That includes
* object existence with OBJECT_INFO_QUICK, as well as
* our search for unique abbreviated hashes. Don't use it for tasks
* requiring greater accuracy!
*
* Be sure to call odb_load_loose_cache() before using.
*/
uint32_t subdir_seen[8]; /* 256 bits */
struct oidtree *cache;
/* Map between object IDs for loose objects. */
struct loose_object_map *map;
};
struct odb_source_loose *odb_source_loose_new(struct object_database *odb,
const char *path,
bool local);
/*
* Cast the given object database source to the loose backend. This will cause
* a BUG in case the source doesn't use this backend.
*/
static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_source *source)
{
if (source->type != ODB_SOURCE_LOOSE)
BUG("trying to downcast source of type '%d' to loose", source->type);
return container_of(source, struct odb_source_loose, base);
}
#endif

View File

@@ -14,6 +14,9 @@ enum odb_source_type {
/* The "files" backend that uses loose objects and packfiles. */
ODB_SOURCE_FILES,
/* The "loose" backend that uses loose objects, only. */
ODB_SOURCE_LOOSE,
/* The "in-memory" backend that stores objects in memory. */
ODB_SOURCE_INMEMORY,
};