From 027e3b3d38fa7989b17bdf60501d5f1617141688 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:25 +0200 Subject: [PATCH 1/8] t0001: plug test gaps for git-init(1) with GIT_OBJECT_DIRECTORY In subsequent commits we'll rework how we set up the repository. This is a somewhat intricate and thus fragile sequence; there's many things that can go subtly wrong, and there are lots of interesting interactions that one can discover. One such discovered edge case was the interaction between git-init(1) and the "GIT_OBJECT_DIRECTORY" environment variable. When set, the behaviour is that the object directory should be created at the path that the variable points to. This behaviour is documented as such in its man page: If the object storage directory is specified via the GIT_OBJECT_DIRECTORY environment variable then the sha1 directories are created underneath; otherwise, the default $GIT_DIR/objects directory is used. Curiously enough though we don't seem to have any tests that exercise this directly, and thus a subsequent commit inadvertently would have broken this expectation. Plug this test gap. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t0001-init.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/t/t0001-init.sh b/t/t0001-init.sh index e4d32bb4d2..e89feca544 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -980,4 +980,14 @@ test_expect_success 're-init reads matching includeIf.onbranch' ' test_cmp expect err ' +test_expect_success 'init honors GIT_OBJECT_DIRECTORY' ' + test_when_finished "rm -rf init-objdir custom-odb" && + mkdir custom-odb && + env GIT_OBJECT_DIRECTORY="$(pwd)/custom-odb" \ + git init init-objdir && + test_path_is_missing init-objdir/.git/objects/pack && + test_path_is_dir custom-odb/pack && + test_path_is_dir custom-odb/info +' + test_done From 452ad8db6d9155d6c7305d6045d29c49a7cc9c7c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:26 +0200 Subject: [PATCH 2/8] setup: drop `setup_git_env()` The `setup_git_env()` function is a trivial wrapper around `setup_git_env_internal()` and has a single call site only. Drop the function. While at it, drop stale documentation in "environment.h" that points to this function, even though it hasn't been exposed to callers outside of "setup.c" since 43ad1047a9 (setup: stop using `the_repository` in `setup_git_env()`, 2026-03-27) anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- environment.h | 8 +------- refs.c | 3 ++- setup.c | 7 +------ 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/environment.h b/environment.h index 9eb97b3869..ccfcf37bfb 100644 --- a/environment.h +++ b/environment.h @@ -130,13 +130,6 @@ void repo_config_values_init(struct repo_config_values *cfg); * `the_repository`. We should eventually get rid of these and make the * dependency on a repository explicit: * - * - `setup_git_env()` ideally shouldn't exist as it modifies global state, - * namely the environment. The current process shouldn't ever access that - * state via envvars though, but should instead consult a `struct - * repository`. When spawning new processes, we would ideally also pass a - * `struct repository` and then set up the environment variables for the - * child process, only. - * * - `have_git_dir()` should not have to exist at all. Instead, we should * decide on whether or not we have a `struct repository`. * @@ -147,6 +140,7 @@ void repo_config_values_init(struct repo_config_values *cfg); * Please do not add new global config variables here. */ # ifdef USE_THE_REPOSITORY_VARIABLE + /* * Returns true iff we have a configured git repository (either via * setup_git_directory, or in the environment via $GIT_DIR). diff --git a/refs.c b/refs.c index 0f3355d2ee..e7070eb743 100644 --- a/refs.c +++ b/refs.c @@ -126,7 +126,8 @@ struct ref_namespace_info ref_namespace[] = { * points to the content of another. Unlike the other * ref namespaces, this one can be changed by the * GIT_REPLACE_REF_BASE environment variable. This - * .namespace value will be overwritten in setup_git_env(). + * .namespace value will be overwritten during repository + * setup. */ .ref = "refs/replace/", .decoration = DECORATION_GRAFTED, diff --git a/setup.c b/setup.c index d723306dfe..252b443117 100644 --- a/setup.c +++ b/setup.c @@ -1074,11 +1074,6 @@ static void setup_git_env_internal(struct repository *repo, fetch_if_missing = 0; } -static void setup_git_env(struct repository *repo, const char *git_dir) -{ - setup_git_env_internal(repo, git_dir, false); -} - static void set_git_dir_1(struct repository *repo, const char *path, bool skip_initializing_odb) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); @@ -2023,7 +2018,7 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; - setup_git_env(repo, gitdir); + setup_git_env_internal(repo, gitdir, false); } if (startup_info->have_repository) { repo_set_hash_algo(repo, repo_fmt.hash_algo); From 3d884b0b5656fe012002edd6bb8f36a125e6c17e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:27 +0200 Subject: [PATCH 3/8] setup: deduplicate logic to apply repository format After having discovered the repository format we then apply it to the repository so that it knows to use the proper repository extensions. The logic to apply the format is duplicated across three callsites, which makes it rather painfull to add new extensions. Introduce a new function `apply_repository_format()` that takes a repo and applies a given format to it and adapt all callsites to use it. This function is also the new caller of `verify_repository_format()` so that we can ensure that we never apply an invalid repository format. The verification we have in `read_and_verify_repository_format()` is thus redundant now and dropped. Rename `read_and_verify_repository_format()` accordingly. While at it, also rename `check_repository_format()` to clarify that it doesn't only _check_ the format, but that it also applies it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repository.c | 31 +++++++----------- setup.c | 93 ++++++++++++++++++++++++++++------------------------ setup.h | 10 ++++++ 3 files changed, 71 insertions(+), 63 deletions(-) diff --git a/repository.c b/repository.c index db57b8308b..58a13f7c4f 100644 --- a/repository.c +++ b/repository.c @@ -262,8 +262,8 @@ void repo_set_worktree(struct repository *repo, const char *path) trace2_def_repo(repo); } -static int read_and_verify_repository_format(struct repository_format *format, - const char *commondir) +static int read_repository_format_from_commondir(struct repository_format *format, + const char *commondir) { int ret = 0; struct strbuf sb = STRBUF_INIT; @@ -272,11 +272,6 @@ static int read_and_verify_repository_format(struct repository_format *format, read_repository_format(format, sb.buf); strbuf_reset(&sb); - if (verify_repository_format(format, &sb) < 0) { - warning("%s", sb.buf); - ret = -1; - } - strbuf_release(&sb); return ret; } @@ -290,6 +285,8 @@ int repo_init(struct repository *repo, const char *worktree) { struct repository_format format = REPOSITORY_FORMAT_INIT; + struct strbuf err = STRBUF_INIT; + memset(repo, 0, sizeof(*repo)); initialize_repository(repo); @@ -297,21 +294,13 @@ int repo_init(struct repository *repo, if (repo_init_gitdir(repo, gitdir)) goto error; - if (read_and_verify_repository_format(&format, repo->commondir)) + if (read_repository_format_from_commondir(&format, repo->commondir)) goto error; - repo_set_hash_algo(repo, format.hash_algo); - repo_set_compat_hash_algo(repo, format.compat_hash_algo); - repo_set_ref_storage_format(repo, format.ref_storage_format, - format.ref_storage_payload); - repo->repository_format_worktree_config = format.worktree_config; - repo->repository_format_relative_worktrees = format.relative_worktrees; - repo->repository_format_precious_objects = format.precious_objects; - repo->repository_format_submodule_path_cfg = format.submodule_path_cfg; - - /* take ownership of format.partial_clone */ - repo->repository_format_partial_clone = format.partial_clone; - format.partial_clone = NULL; + if (apply_repository_format(repo, &format, &err) < 0) { + warning("%s", err.buf); + goto error; + } if (worktree) repo_set_worktree(repo, worktree); @@ -320,10 +309,12 @@ int repo_init(struct repository *repo, repo_read_loose_object_map(repo); clear_repository_format(&format); + strbuf_release(&err); return 0; error: clear_repository_format(&format); + strbuf_release(&err); repo_clear(repo); return -1; } diff --git a/setup.c b/setup.c index 252b443117..c5015923f1 100644 --- a/setup.c +++ b/setup.c @@ -750,8 +750,7 @@ static int check_repo_format(const char *var, const char *value, return read_worktree_config(var, value, ctx, vdata); } -static int check_repository_format_gently(struct repository *repo, - const char *gitdir, +static int check_repository_format_gently(const char *gitdir, struct repository_format *candidate, int *nongit_ok) { @@ -765,7 +764,7 @@ static int check_repository_format_gently(struct repository *repo, strbuf_release(&sb); /* - * For historical use of check_repository_format() in git-init, + * For historical use of check_and_apply_repository_format() in git-init, * we treat a missing config as a silent "ok", even when nongit_ok * is unset. */ @@ -782,8 +781,6 @@ static int check_repository_format_gently(struct repository *repo, die("%s", err.buf); } - repo->repository_format_precious_objects = candidate->precious_objects; - string_list_clear(&candidate->unknown_extensions, 0); string_list_clear(&candidate->v1_only_extensions, 0); @@ -1140,7 +1137,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, die(_("not a git repository: '%s'"), gitdirenv); } - if (check_repository_format_gently(repo, gitdirenv, repo_fmt, nongit_ok)) { + if (check_repository_format_gently(gitdirenv, repo_fmt, nongit_ok)) { free(gitfile); return NULL; } @@ -1217,7 +1214,7 @@ static const char *setup_discovered_git_dir(struct repository *repo, struct repository_format *repo_fmt, int *nongit_ok) { - if (check_repository_format_gently(repo, gitdir, repo_fmt, nongit_ok)) + if (check_repository_format_gently(gitdir, repo_fmt, nongit_ok)) return NULL; /* --work-tree is set without --git-dir; use discovered one */ @@ -1265,7 +1262,7 @@ static const char *setup_bare_git_dir(struct repository *repo, { int root_len; - if (check_repository_format_gently(repo, ".", repo_fmt, nongit_ok)) + if (check_repository_format_gently(".", repo_fmt, nongit_ok)) return NULL; setenv(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, "0", 1); @@ -1757,6 +1754,32 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } +int apply_repository_format(struct repository *repo, + const struct repository_format *format, + struct strbuf *err) +{ + if (verify_repository_format(format, err) < 0) + return -1; + + repo_set_hash_algo(repo, format->hash_algo); + repo_set_compat_hash_algo(repo, format->compat_hash_algo); + repo_set_ref_storage_format(repo, + format->ref_storage_format, + format->ref_storage_payload); + repo->repository_format_worktree_config = + format->worktree_config; + repo->repository_format_submodule_path_cfg = + format->submodule_path_cfg; + repo->repository_format_relative_worktrees = + format->relative_worktrees; + repo->repository_format_partial_clone = + xstrdup_or_null(format->partial_clone); + repo->repository_format_precious_objects = + format->precious_objects; + + return 0; +} + /* * Check the repository format version in the path found in repo_get_git_dir(repo), * and die if it is a version we don't understand. Generally one would @@ -1765,26 +1788,20 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, * * If successful and fmt is not NULL, fill fmt with data. */ -static void check_repository_format(struct repository *repo, struct repository_format *fmt) +static void check_and_apply_repository_format(struct repository *repo, + struct repository_format *fmt) { struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; + struct strbuf err = STRBUF_INIT; + if (!fmt) fmt = &repo_fmt; - check_repository_format_gently(repo, repo_get_git_dir(repo), fmt, NULL); + + check_repository_format_gently(repo_get_git_dir(repo), fmt, NULL); + if (apply_repository_format(repo, fmt, &err) < 0) + die("%s", err.buf); startup_info->have_repository = 1; - repo_set_hash_algo(repo, fmt->hash_algo); - repo_set_compat_hash_algo(repo, fmt->compat_hash_algo); - repo_set_ref_storage_format(repo, - fmt->ref_storage_format, - fmt->ref_storage_payload); - repo->repository_format_worktree_config = - fmt->worktree_config; - repo->repository_format_submodule_path_cfg = - fmt->submodule_path_cfg; - repo->repository_format_relative_worktrees = - fmt->relative_worktrees; - repo->repository_format_partial_clone = - xstrdup_or_null(fmt->partial_clone); + clear_repository_format(&repo_fmt); } @@ -1862,7 +1879,7 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags if (is_git_directory(".")) { set_git_dir(repo, ".", 0); - check_repository_format(repo, NULL); + check_and_apply_repository_format(repo, NULL); return path; } @@ -2020,25 +2037,15 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; setup_git_env_internal(repo, gitdir, false); } + if (startup_info->have_repository) { - repo_set_hash_algo(repo, repo_fmt.hash_algo); - repo_set_compat_hash_algo(repo, - repo_fmt.compat_hash_algo); - repo_set_ref_storage_format(repo, - repo_fmt.ref_storage_format, - repo_fmt.ref_storage_payload); - repo->repository_format_worktree_config = - repo_fmt.worktree_config; - repo->repository_format_relative_worktrees = - repo_fmt.relative_worktrees; - repo->repository_format_submodule_path_cfg = - repo_fmt.submodule_path_cfg; - /* take ownership of repo_fmt.partial_clone */ - repo->repository_format_partial_clone = - repo_fmt.partial_clone; - repo_fmt.partial_clone = NULL; - repo->repository_format_precious_objects = - repo_fmt.precious_objects; + struct strbuf err = STRBUF_INIT; + + if (apply_repository_format(repo, &repo_fmt, &err) < 0) + die("%s", err.buf); + + clear_repository_format(&repo_fmt); + strbuf_release(&err); } } /* @@ -2814,7 +2821,7 @@ int init_db(struct repository *repo, * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_repository_format(repo, &repo_fmt); + check_and_apply_repository_format(repo, &repo_fmt); repository_format_configure(repo, &repo_fmt, hash, ref_storage_format); diff --git a/setup.h b/setup.h index 9409326fe4..efbb82fdbf 100644 --- a/setup.h +++ b/setup.h @@ -221,6 +221,16 @@ void clear_repository_format(struct repository_format *format); int verify_repository_format(const struct repository_format *format, struct strbuf *err); +/* + * Apply the given repository format to the repo. This initializes extensions + * and basic data structures required for normal operation. Returns 0 on + * success, a negative error code when the format is not valid as determined by + * `verify_repository_format()`. + */ +int apply_repository_format(struct repository *repo, + const struct repository_format *format, + struct strbuf *err); + const char *get_template_dir(const char *option_template); #define INIT_DB_QUIET (1 << 0) From 6a2fbab4c95b0fc317514ec7ead618b3b37e3553 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:28 +0200 Subject: [PATCH 4/8] repository: stop initializing the object database in `repo_set_gitdir()` The function `repo_set_gitdir()` obviously sets the Git directory for a given repository. Less obviously though, the function also configures a couple of auxiliary settings. One such thing is that we create the object database in this function. This logic only happens conditionally though, as `set_git_dir()` may be called multiple times during repository setup, and we don't want to create the object database multiple times. This is somewhat tangled and hard to follow. Remove the logic from `repo_set_gitdir()` and instead initialize the object database outside of it. This leads to some duplication right now, but that duplication will be removed in a subsequent step where we will start initializing the object database as part of applying the repo's format. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repository.c | 8 ++------ repository.h | 3 --- setup.c | 7 ++++--- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/repository.c b/repository.c index 58a13f7c4f..2c2395105f 100644 --- a/repository.c +++ b/repository.c @@ -181,12 +181,6 @@ void repo_set_gitdir(struct repository *repo, free(old_gitdir); repo_set_commondir(repo, o->commondir); - - if (!repo->objects) - repo->objects = odb_new(repo, o->object_dir, o->alternate_db); - else if (!o->skip_initializing_odb) - BUG("cannot reinitialize an already-initialized object directory"); - repo->disable_ref_updates = o->disable_ref_updates; expand_base_dir(&repo->graft_file, o->graft_file, @@ -302,6 +296,8 @@ int repo_init(struct repository *repo, goto error; } + repo->objects = odb_new(repo, NULL, NULL); + if (worktree) repo_set_worktree(repo, worktree); diff --git a/repository.h b/repository.h index c3ec0f4b79..36e2db2633 100644 --- a/repository.h +++ b/repository.h @@ -221,12 +221,9 @@ const char *repo_get_work_tree(struct repository *repo); */ struct set_gitdir_args { const char *commondir; - const char *object_dir; const char *graft_file; const char *index_file; - const char *alternate_db; bool disable_ref_updates; - bool skip_initializing_odb; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index c5015923f1..3bd3f6c592 100644 --- a/setup.c +++ b/setup.c @@ -1045,17 +1045,18 @@ static void setup_git_env_internal(struct repository *repo, struct strvec to_free = STRVEC_INIT; args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); - args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); - args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); if (getenv(GIT_QUARANTINE_ENVIRONMENT)) args.disable_ref_updates = true; - args.skip_initializing_odb = skip_initializing_odb; repo_set_gitdir(repo, git_dir, &args); strvec_clear(&to_free); + if (!skip_initializing_odb) + repo->objects = odb_new(repo, getenv_safe(&to_free, DB_ENVIRONMENT), + getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); + if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) disable_replace_refs(); replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); From aae4ebc895272dc7e5a9ccfc135878b55c7322d7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:29 +0200 Subject: [PATCH 5/8] setup: stop creating the object database in `setup_git_env()` In the preceding commit we have stopped creating the object database in `repo_set_gitdir()`. But the logic is still somewhat confusing as we still end up creating it conditionally in `setup_git_dir()`, which is called multiple times. Drop the conditional logic and instead create the object database in all places where we have discovered and configured a repository. This leads to even more duplication than we already had in the preceding commit, but an alert reader may notice that we now (almost) always call `odb_new()` directly before having called `apply_repository_format()`. The only exception to this is `setup_git_directory_gently()`, where we also call the function when _not_ applying the repository format. This will be fixed in the next commit, and once that's done we can then unify creation of the object database into `apply_repository_format()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/setup.c b/setup.c index 3bd3f6c592..0dc9fe4565 100644 --- a/setup.c +++ b/setup.c @@ -1035,8 +1035,7 @@ cleanup_return: } static void setup_git_env_internal(struct repository *repo, - const char *git_dir, - bool skip_initializing_odb) + const char *git_dir) { char *git_replace_ref_base; const char *shallow_file; @@ -1053,10 +1052,6 @@ static void setup_git_env_internal(struct repository *repo, repo_set_gitdir(repo, git_dir, &args); strvec_clear(&to_free); - if (!skip_initializing_odb) - repo->objects = odb_new(repo, getenv_safe(&to_free, DB_ENVIRONMENT), - getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); - if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) disable_replace_refs(); replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); @@ -1072,10 +1067,10 @@ static void setup_git_env_internal(struct repository *repo, fetch_if_missing = 0; } -static void set_git_dir_1(struct repository *repo, const char *path, bool skip_initializing_odb) +static void set_git_dir_1(struct repository *repo, const char *path) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env_internal(repo, path, skip_initializing_odb); + setup_git_env_internal(repo, path); } static void update_relative_gitdir(const char *name UNUSED, @@ -1089,7 +1084,7 @@ static void update_relative_gitdir(const char *name UNUSED, trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); - set_git_dir_1(repo, path, true); + set_git_dir_1(repo, path); free(path); } @@ -1102,7 +1097,7 @@ static void set_git_dir(struct repository *repo, const char *path, int make_real path = realpath.buf; } - set_git_dir_1(repo, path, false); + set_git_dir_1(repo, path); if (!is_absolute_path(path)) chdir_notify_register(NULL, update_relative_gitdir, repo); @@ -1879,8 +1874,15 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags } if (is_git_directory(".")) { + struct strvec to_free = STRVEC_INIT; + set_git_dir(repo, ".", 0); + repo->objects = odb_new(repo, + getenv_safe(&to_free, DB_ENVIRONMENT), + getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); check_and_apply_repository_format(repo, NULL); + + strvec_clear(&to_free); return path; } @@ -2032,13 +2034,19 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) startup_info->have_repository || /* GIT_DIR_EXPLICIT */ getenv(GIT_DIR_ENVIRONMENT)) { + struct strvec to_free = STRVEC_INIT; + if (!repo->gitdir) { const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; - setup_git_env_internal(repo, gitdir, false); + setup_git_env_internal(repo, gitdir); } + repo->objects = odb_new(repo, + getenv_safe(&to_free, DB_ENVIRONMENT), + getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); + if (startup_info->have_repository) { struct strbuf err = STRBUF_INIT; @@ -2048,6 +2056,8 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) clear_repository_format(&repo_fmt); strbuf_release(&err); } + + strvec_clear(&to_free); } /* * Since precompose_string_if_needed() needs to look at @@ -2796,6 +2806,7 @@ int init_db(struct repository *repo, int exist_ok = flags & INIT_DB_EXIST_OK; char *original_git_dir = real_pathdup(git_dir, 1); struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; + struct strvec to_free = STRVEC_INIT; if (real_git_dir) { struct stat st; @@ -2816,6 +2827,9 @@ int init_db(struct repository *repo, } startup_info->have_repository = 1; + repo->objects = odb_new(repo, getenv_safe(&to_free, DB_ENVIRONMENT), + getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); + /* * Check to see if the repository version is right. * Note that a newly created repository does not have @@ -2879,6 +2893,7 @@ int init_db(struct repository *repo, } clear_repository_format(&repo_fmt); + strvec_clear(&to_free); free(original_git_dir); return 0; } From d87de311ff506599ec130ba5f09a4f73e458a5ae Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:30 +0200 Subject: [PATCH 6/8] setup: stop initializing object database without repository The function `setup_git_directory_gently()` is responsible for discovering and setting up a Git repository based on various environment variables and the current working directory. The result is thus a fully usable Git repository. One oddity of this function is that we may set up the object database even in the case where we don't have a repository, namely in the case where the `GIT_DIR_EXPLICIT` environment variable is set but points to a non-existent repository. If so, we call `setup_git_env_internal()` with the value of the environment variable so that the repository's Git directory is configured, even if it points to a non-existent directory. Historically though, this function didn't only configure the repository, but also initialized the object database. We retained this behaviour from a preceding commit, even though it really doesn't make much sense in the first place -- there is no repository, so we don't have an object database either. There seemingly isn't much of a reason to construct the object database, as we typically won't try to read objects when we don't have an object database. There's one exception though: git-index-pack(1) may run outside of a repository, which can be used to perform consistency checks for a packfile. The code path is _almost_ working: we already know to call `parse_object_buffer()`, which can read objects without an object database being available. And that works for all object types except for commits, because `parse_commit_buffer()` calls `parse_commit_graph()`, and that function doesn't handle the case where we don't have an object database. Fix this instance to check for the object database instead of checking for the Git directory having been initialized. With this fixed, we can now stop constructing an object database completely. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- setup.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 9abe62bd5a..0820cf5fb8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -740,13 +740,13 @@ static struct commit_graph *prepare_commit_graph(struct repository *r) struct odb_source *source; /* - * Early return if there is no git dir or if the commit graph is + * Early return if there is no object database or if the commit graph is * disabled. * * This must come before the "already attempted?" check below, because * we want to disable even an already-loaded graph file. */ - if (!r->gitdir || r->commit_graph_disabled) + if (!r->objects || r->commit_graph_disabled) return NULL; if (r->objects->commit_graph_attempted) diff --git a/setup.c b/setup.c index 0dc9fe4565..4a8d6230b1 100644 --- a/setup.c +++ b/setup.c @@ -2043,13 +2043,12 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) setup_git_env_internal(repo, gitdir); } - repo->objects = odb_new(repo, - getenv_safe(&to_free, DB_ENVIRONMENT), - getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); - if (startup_info->have_repository) { struct strbuf err = STRBUF_INIT; + repo->objects = odb_new(repo, + getenv_safe(&to_free, DB_ENVIRONMENT), + getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); if (apply_repository_format(repo, &repo_fmt, &err) < 0) die("%s", err.buf); From a84a9d4acdae51f58529b2596c4bd935fe9af372 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:31 +0200 Subject: [PATCH 7/8] repository: stop reading loose object map twice on repo init When initializing a repository via `repo_init()` we end up reading the loose object map twice: - `apply_repository_format()` calls `repo_set_compat_hash_algo()`, which in turn calls `repo_read_loose_object_map()` if we have a compatibility hash configured. - `repo_init()` calls `repo_read_loose_object_map()` directly a second time. Drop the second read of the loose object map in `repo_init()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repository.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/repository.c b/repository.c index 2c2395105f..61dfbb8be6 100644 --- a/repository.c +++ b/repository.c @@ -301,9 +301,6 @@ int repo_init(struct repository *repo, if (worktree) repo_set_worktree(repo, worktree); - if (repo->compat_hash_algo) - repo_read_loose_object_map(repo); - clear_repository_format(&format); strbuf_release(&err); return 0; From 42b9d3dc9dfa9e733cbd6402e665ac35fce0c216 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 4 Jun 2026 09:46:32 +0200 Subject: [PATCH 8/8] setup: construct object database in `apply_repository_format()` With the preceding changes we now always construct the repository's object database before applying the repository format. Remove this duplication by constructing it in `apply_repository_format()` instead. Note that we create the object database _after_ having set up the repository's hash algorithm, but _before_ setting the compat hash algorithm. This is intentional: - Constructing the object database may require knowledge of its intended object format. - Setting up the compatibility hash requires the object database to be initialized already, because we immediately read the loose object map. The first point is sensible, the second maybe a little less so. Ideally, it should be the responsibility of the object database itself to initialize any data structures required for the compatibility hash. But this would require further changes, so this is kept as-is for now. Further note that this requires us to move handling of the environment variables GIT_OBJECT_DIRECTORY and GIT_ALTERNATE_OBJECT_DIRECTORIES into the repository format, as well. This allows the caller more flexibility around whether or not those environment variables are being honored, as we want to respect them in "setup.c", but not in "repository.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- repository.c | 4 +--- setup.c | 45 +++++++++++++++++++++------------------------ setup.h | 10 ++++++++++ 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/repository.c b/repository.c index 61dfbb8be6..187dd471c4 100644 --- a/repository.c +++ b/repository.c @@ -291,13 +291,11 @@ int repo_init(struct repository *repo, if (read_repository_format_from_commondir(&format, repo->commondir)) goto error; - if (apply_repository_format(repo, &format, &err) < 0) { + if (apply_repository_format(repo, &format, 0, &err) < 0) { warning("%s", err.buf); goto error; } - repo->objects = odb_new(repo, NULL, NULL); - if (worktree) repo_set_worktree(repo, worktree); diff --git a/setup.c b/setup.c index 4a8d6230b1..513fc88749 100644 --- a/setup.c +++ b/setup.c @@ -1752,12 +1752,22 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, int apply_repository_format(struct repository *repo, const struct repository_format *format, + enum apply_repository_format_flags flags, struct strbuf *err) { + char *object_directory = NULL, *alternate_object_directories = NULL; + if (verify_repository_format(format, err) < 0) return -1; + if (flags & APPLY_REPOSITORY_FORMAT_HONOR_ENV) { + object_directory = xstrdup_or_null(getenv(DB_ENVIRONMENT)); + alternate_object_directories = xstrdup_or_null(getenv(ALTERNATE_DB_ENVIRONMENT)); + } + repo_set_hash_algo(repo, format->hash_algo); + repo->objects = odb_new(repo, object_directory, + alternate_object_directories); repo_set_compat_hash_algo(repo, format->compat_hash_algo); repo_set_ref_storage_format(repo, format->ref_storage_format, @@ -1773,6 +1783,8 @@ int apply_repository_format(struct repository *repo, repo->repository_format_precious_objects = format->precious_objects; + free(alternate_object_directories); + free(object_directory); return 0; } @@ -1785,7 +1797,8 @@ int apply_repository_format(struct repository *repo, * If successful and fmt is not NULL, fill fmt with data. */ static void check_and_apply_repository_format(struct repository *repo, - struct repository_format *fmt) + struct repository_format *fmt, + enum apply_repository_format_flags flags) { struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; struct strbuf err = STRBUF_INIT; @@ -1794,7 +1807,7 @@ static void check_and_apply_repository_format(struct repository *repo, fmt = &repo_fmt; check_repository_format_gently(repo_get_git_dir(repo), fmt, NULL); - if (apply_repository_format(repo, fmt, &err) < 0) + if (apply_repository_format(repo, fmt, flags, &err) < 0) die("%s", err.buf); startup_info->have_repository = 1; @@ -1874,15 +1887,9 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags } if (is_git_directory(".")) { - struct strvec to_free = STRVEC_INIT; - set_git_dir(repo, ".", 0); - repo->objects = odb_new(repo, - getenv_safe(&to_free, DB_ENVIRONMENT), - getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); - check_and_apply_repository_format(repo, NULL); - - strvec_clear(&to_free); + check_and_apply_repository_format(repo, NULL, + APPLY_REPOSITORY_FORMAT_HONOR_ENV); return path; } @@ -2034,8 +2041,6 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) startup_info->have_repository || /* GIT_DIR_EXPLICIT */ getenv(GIT_DIR_ENVIRONMENT)) { - struct strvec to_free = STRVEC_INIT; - if (!repo->gitdir) { const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) @@ -2046,17 +2051,13 @@ const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) if (startup_info->have_repository) { struct strbuf err = STRBUF_INIT; - repo->objects = odb_new(repo, - getenv_safe(&to_free, DB_ENVIRONMENT), - getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); - if (apply_repository_format(repo, &repo_fmt, &err) < 0) + if (apply_repository_format(repo, &repo_fmt, + APPLY_REPOSITORY_FORMAT_HONOR_ENV, &err) < 0) die("%s", err.buf); clear_repository_format(&repo_fmt); strbuf_release(&err); } - - strvec_clear(&to_free); } /* * Since precompose_string_if_needed() needs to look at @@ -2805,7 +2806,6 @@ int init_db(struct repository *repo, int exist_ok = flags & INIT_DB_EXIST_OK; char *original_git_dir = real_pathdup(git_dir, 1); struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; - struct strvec to_free = STRVEC_INIT; if (real_git_dir) { struct stat st; @@ -2826,16 +2826,14 @@ int init_db(struct repository *repo, } startup_info->have_repository = 1; - repo->objects = odb_new(repo, getenv_safe(&to_free, DB_ENVIRONMENT), - getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT)); - /* * Check to see if the repository version is right. * Note that a newly created repository does not have * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_and_apply_repository_format(repo, &repo_fmt); + check_and_apply_repository_format(repo, &repo_fmt, + APPLY_REPOSITORY_FORMAT_HONOR_ENV); repository_format_configure(repo, &repo_fmt, hash, ref_storage_format); @@ -2892,7 +2890,6 @@ int init_db(struct repository *repo, } clear_repository_format(&repo_fmt); - strvec_clear(&to_free); free(original_git_dir); return 0; } diff --git a/setup.h b/setup.h index efbb82fdbf..19679fe78f 100644 --- a/setup.h +++ b/setup.h @@ -221,6 +221,15 @@ void clear_repository_format(struct repository_format *format); int verify_repository_format(const struct repository_format *format, struct strbuf *err); +enum apply_repository_format_flags { + /* + * Honor environment variables when applying the repository format to + * the repository. For now, this only covers environment variables that + * relate to the object database. + */ + APPLY_REPOSITORY_FORMAT_HONOR_ENV = (1 << 0), +}; + /* * Apply the given repository format to the repo. This initializes extensions * and basic data structures required for normal operation. Returns 0 on @@ -229,6 +238,7 @@ int verify_repository_format(const struct repository_format *format, */ int apply_repository_format(struct repository *repo, const struct repository_format *format, + enum apply_repository_format_flags flags, struct strbuf *err); const char *get_template_dir(const char *option_template);