From 04c9c5e8d2d99050d260149cad9dde1302a02ff4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 24 Mar 2026 07:18:26 +0100 Subject: [PATCH] commit-graph: fix writing generations with dates exceeding 34 bits The `timestamp_t` type is declared as `uintmax_t` and thus typically has 64 bits of precision. Usually, the full precision of such dates is not required: it would be comforting to know that Git is still around in millions of years, but all in all the chance is rather low. We abuse this fact in the commit-graph: instead of storing the full 64 bits of precision, committer dates only store 34 bits. This is still plenty of headroom, as it means that we can represent dates until year 2514. Commits which are dated beyond that year will simply get a date whose remaining bits are masked. The result of this is somewhat curious: the committer date will be different depending on whether a commit gets parsed via the commit-graph or via the object database. This isn't really too much of an issue in general though, as we don't typically use the date parsed from the commit-graph in user-facing output. But with 024b4c9697 (commit: make `repo_parse_commit_no_graph()` more robust, 2026-02-16) it started to become a problem when writing the commit-graph itself. This commit changed `repo_parse_commit_no_graph()` so that we re-parse the commit via the object database in case it was already parsed beforehand via the commit-graph. The consequence is that we may now act with two different commit dates at different stages: - Initially, we use the 34-bit precision timestamp when writing the chunk generation data. We thus correctly compute the offsets relative to the on-disk timestamp here. - Later, when writing the overflow data, we may end up with the full-precision timestamp. When the date is larger than 34 bits the result of this is an underflow when computing the offset. This causes a mismatch in the number of generation data overflow records we want to write, and that ultimately causes Git to die. Introduce a new helper function that computes the generation offset for a commit while correctly masking the date to 34 bits. This makes the previously-implicit assumptions about the commit date precision explicit and thus hopefully less fragile going forward. Adapt sites that compute the offset to use the function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-graph.c | 37 ++++++++++++++++++++++++++++++++++--- t/t5318-commit-graph.sh | 20 ++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6b1f02e179..ad3582451d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1319,6 +1319,37 @@ static int write_graph_chunk_data(struct hashfile *f, return 0; } +/* + * Compute the generation offset between the commit date and its generation. + * This is what's ultimately stored as generation number in the commit graph. + * + * Note that the computation of the commit date is more involved than you might + * think. Instead of using the full commit date, we're in fact masking bits so + * that only the 34 lowest bits are considered. This results from the fact that + * commit graphs themselves only ever store 34 bits of the commit date + * themselves. + * + * This means that if we have a commit date that exceeds 34 bits we'll end up + * in situations where depending on whether the commit has been parsed from the + * object database or the commit graph we'll have different dates, where the + * ones parsed from the object database would have full 64 bit precision. + * + * But ultimately, we only ever want the offset to be relative to what we + * actually end up storing on disk, and hence we have to mask all the other + * bits. + */ +static timestamp_t compute_generation_offset(struct commit *c) +{ + timestamp_t masked_date; + + if (sizeof(timestamp_t) > 4) + masked_date = c->date & (((timestamp_t) 1 << 34) - 1); + else + masked_date = c->date; + + return commit_graph_data_at(c)->generation - masked_date; +} + static int write_graph_chunk_generation_data(struct hashfile *f, void *data) { @@ -1329,7 +1360,7 @@ static int write_graph_chunk_generation_data(struct hashfile *f, struct commit *c = ctx->commits.items[i]; timestamp_t offset; repo_parse_commit(ctx->r, c); - offset = commit_graph_data_at(c)->generation - c->date; + offset = compute_generation_offset(c); display_progress(ctx->progress, ++ctx->progress_cnt); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { @@ -1350,7 +1381,7 @@ static int write_graph_chunk_generation_data_overflow(struct hashfile *f, int i; for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.items[i]; - timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + timestamp_t offset = compute_generation_offset(c); display_progress(ctx->progress, ++ctx->progress_cnt); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) { @@ -1733,7 +1764,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) for (i = 0; i < ctx->commits.nr; i++) { struct commit *c = ctx->commits.items[i]; - timestamp_t offset = commit_graph_data_at(c)->generation - c->date; + timestamp_t offset = compute_generation_offset(c); if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) ctx->num_generation_data_overflows++; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 98c6910963..1c40f904f8 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -417,6 +417,26 @@ test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'lower layers have overflow ch test_cmp full/.git/objects/info/commit-graph commit-graph-upgraded ' +test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'overflow chunk when replacing commit-graph' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + cat >commit <<-EOF && + tree $(test_oid empty_tree) + author Example 9223372036854775 +0000 + committer Example 9223372036854775 +0000 + + Weird commit date + EOF + commit_id=$(git hash-object -t commit -w commit) && + git reset --hard "$commit_id" && + git commit-graph write --reachable && + git commit-graph write --reachable --split=replace && + git log + ) +' + # the verify tests below expect the commit-graph to contain # exactly the commits reachable from the commits/8 branch. # If the file changes the set of commits in the list, then the