From 5bd39784cda151203aa6d97e24c21bf7fddc11de Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 12 Jun 2026 17:49:12 -0400 Subject: [PATCH 1/3] commit-reach: reject cycles in contains walk The memoized contains traversal used by git tag assumes that commit ancestry is acyclic. Replacement refs can violate that assumption, causing it to keep pushing an already active commit until memory is exhausted. Mark commits while they are active and die if the traversal encounters an active commit. Other failures in this walk already die through parse_commit_or_die(); using a second reachability walk would only add a separate policy for malformed history. Suggested-by: Kristofer Karlsson Signed-off-by: Tamir Duberstein Signed-off-by: Junio C Hamano --- commit-reach.c | 12 +++++++++--- commit-reach.h | 3 ++- t/t7004-tag.sh | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 5df471a313..58553dff05 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -757,7 +757,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) /* * Test whether the candidate is contained in the list. - * Do not recurse to find out, though, but return -1 if inconclusive. + * Do not recurse to find out, though, but return CONTAINS_UNKNOWN if + * inconclusive. */ static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, @@ -814,6 +815,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, if (result != CONTAINS_UNKNOWN) return result; + *contains_cache_at(cache, candidate) = CONTAINS_IN_PROGRESS; push_to_contains_stack(candidate, &contains_stack); while (contains_stack.nr) { struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; @@ -825,8 +827,8 @@ static enum contains_result contains_tag_algo(struct commit *candidate, contains_stack.nr--; } /* - * If we just popped the stack, parents->item has been marked, - * therefore contains_test will return a meaningful yes/no. + * A parent may have just been popped and marked, or may still + * be active when replacement refs create a cycle. */ else switch (contains_test(parents->item, want, cache, cutoff)) { case CONTAINS_YES: @@ -836,7 +838,11 @@ static enum contains_result contains_tag_algo(struct commit *candidate, case CONTAINS_NO: entry->parents = parents->next; break; + case CONTAINS_IN_PROGRESS: + die(_("commit ancestry contains a cycle")); case CONTAINS_UNKNOWN: + *contains_cache_at(cache, parents->item) = + CONTAINS_IN_PROGRESS; push_to_contains_stack(parents->item, &contains_stack); break; } diff --git a/commit-reach.h b/commit-reach.h index 3f3a563d8a..f908d305b1 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -73,7 +73,8 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); enum contains_result { CONTAINS_UNKNOWN = 0, CONTAINS_NO, - CONTAINS_YES + CONTAINS_YES, + CONTAINS_IN_PROGRESS }; define_commit_slab(contains_cache, enum contains_result); diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index d918005dd9..67309494d2 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -1611,6 +1611,24 @@ test_expect_success 'checking that first commit is in all tags (hash)' ' test_cmp expected actual ' +test_expect_success 'tag --contains rejects cyclic replacement histories' ' + first=$(git rev-parse HEAD~2) && + second=$(git rev-parse HEAD~) && + third=$(git rev-parse HEAD) && + test_when_finished " + git replace -d $first && + git replace -d $third && + git tag -d cycle-a cycle-b + " && + git tag cycle-a "$first" && + git tag cycle-b "$third" && + git replace --graft "$first" "$third" "$second" && + git replace --graft "$third" "$first" && + test_must_fail git tag --contains="$second" --list "cycle-*" \ + >/dev/null 2>err && + test_grep "fatal: commit ancestry contains a cycle" err +' + # other ways of specifying the commit test_expect_success 'checking that first commit is in all tags (tag)' ' cat >expected <<-\EOF && From ca96eeee79bc231f8096d9e9d0b501e0495e2db7 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 12 Jun 2026 17:49:13 -0400 Subject: [PATCH 2/3] ref-filter: memoize --contains with generations git branch and git for-each-ref run a separate reachability walk for each ref considered by --contains and --no-contains. Refs with shared history therefore traverse the same commits repeatedly. git tag instead uses a depth-first walk that caches results across refs. That walk can perform poorly without generation numbers: a negative check may walk to the root instead of stopping at a nearby divergence. Generation numbers let it stop below the oldest target. Use the memoized walk for all ref-filter callers when generation numbers are available. Keep git tag on its existing path without generations. Caching still helps when many tags share deep history: ffc4b8012d (tag: speed up --contains calculation, 2011-06-11) reduced git tag --contains HEAD~200 in linux-2.6 from 15.417 to 5.329 seconds. The new shared-history perf test improves from 0.72 to 0.03 seconds. In a repository with 62,174 remote-tracking refs, running: git branch -r --contains c78ae85f3ce7e improves from 104.365 seconds to 468 milliseconds. Suggested-by: Jeff King Signed-off-by: Tamir Duberstein Signed-off-by: Junio C Hamano --- commit-reach.c | 3 ++- t/perf/p1500-graph-walks.sh | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 58553dff05..349ca68a37 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -854,7 +854,8 @@ static enum contains_result contains_tag_algo(struct commit *candidate, int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache) { - if (filter->with_commit_tag_algo) + if (filter->with_commit_tag_algo || + generation_numbers_enabled(the_repository)) return contains_tag_algo(commit, list, cache) == CONTAINS_YES; return repo_is_descendant_of(the_repository, commit, list); } diff --git a/t/perf/p1500-graph-walks.sh b/t/perf/p1500-graph-walks.sh index 5b23ce5db9..d167b4f7e1 100755 --- a/t/perf/p1500-graph-walks.sh +++ b/t/perf/p1500-graph-walks.sh @@ -32,7 +32,16 @@ test_expect_success 'setup' ' echo "X:$line" >>test-tool-tags || return 1 done && - commit=$(git commit-tree $(git rev-parse HEAD^{tree})) && + git rev-list --first-parent --max-count=8192 HEAD >contains-commits && + test_file_not_empty contains-commits && + git update-ref refs/contains-perf-base "$(tail -n 1 contains-commits)" && + awk "{ + printf \"update refs/contains-perf/%04d %s\\n\", NR, \$1 + }" contains-commits | + git update-ref --stdin && + git pack-refs --include "refs/contains-perf/*" && + + commit=$(git commit-tree HEAD^{tree}) && git update-ref refs/heads/disjoint-base $commit && git commit-graph write --reachable @@ -62,6 +71,23 @@ test_perf 'contains: git tag --merged' ' xargs git tag --merged=HEAD /dev/null +' + test_perf 'is-base check: test-tool reach (refs)' ' test-tool reach get_branch_base_for_tip Date: Fri, 12 Jun 2026 17:49:14 -0400 Subject: [PATCH 3/3] commit-reach: die on contains walk errors Without generation numbers, repo_is_descendant_of() can return -1 when it cannot read commit ancestry. commit_contains() exposes that result through a Boolean interface, so ref-filter treats it as true. This can include a ref for --contains or exclude it for --no-contains without failing the command. Die when repo_is_descendant_of() reports an error. The memoized walk already dies when it cannot parse a commit, so callers of the non-memoized path no longer turn a failed walk into a match. Reported-by: Jeff King Signed-off-by: Tamir Duberstein Signed-off-by: Junio C Hamano --- commit-reach.c | 8 +++++++- t/t6301-for-each-ref-errors.sh | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/commit-reach.c b/commit-reach.c index 349ca68a37..4acf1c02d5 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -854,10 +854,16 @@ static enum contains_result contains_tag_algo(struct commit *candidate, int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache) { + int result; + if (filter->with_commit_tag_algo || generation_numbers_enabled(the_repository)) return contains_tag_algo(commit, list, cache) == CONTAINS_YES; - return repo_is_descendant_of(the_repository, commit, list); + + result = repo_is_descendant_of(the_repository, commit, list); + if (result < 0) + die(_("failed to check reachability")); + return result; } int can_all_from_reach_with_flag(struct object_array *from, diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh index e06feb06e9..72b27c8be3 100755 --- a/t/t6301-for-each-ref-errors.sh +++ b/t/t6301-for-each-ref-errors.sh @@ -52,6 +52,28 @@ test_expect_success 'Missing objects are reported correctly' ' test_must_be_empty brief-err ' +test_expect_success 'missing ancestors are reported by contains filters' ' + test_when_finished "git update-ref -d refs/heads/missing-parent" && + { + echo "tree $(git rev-parse HEAD^{tree})" && + echo "parent $MISSING" && + git cat-file commit HEAD | + sed -n -e "/^author /p" -e "/^committer /p" && + echo && + echo "missing parent" + } >commit && + broken=$(git hash-object -t commit -w commit) && + git update-ref refs/heads/missing-parent "$broken" && + for option in --contains --no-contains + do + test_must_fail git for-each-ref "$option=HEAD" \ + refs/heads/missing-parent >out 2>err && + test_must_be_empty out && + test_grep "parse commit $MISSING" err || + return 1 + done +' + test_expect_success 'ahead-behind requires an argument' ' test_must_fail git for-each-ref \ --format="%(ahead-behind)" 2>err &&