diff --git a/cache-tree.c b/cache-tree.c index 7881b42aa2..f11844fe72 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -192,22 +192,62 @@ static int verify_cache(struct index_state *istate, int flags) for (i = 0; i + 1 < istate->cache_nr; i++) { /* path/file always comes after path because of the way * the cache is sorted. Also path can appear only once, - * which means conflicting one would immediately follow. + * so path/file is likely the immediately following path + * but might be separated if there is e.g. a + * path-internal/... file. */ const struct cache_entry *this_ce = istate->cache[i]; const struct cache_entry *next_ce = istate->cache[i + 1]; const char *this_name = this_ce->name; const char *next_name = next_ce->name; int this_len = ce_namelen(this_ce); + const char *conflict_name = NULL; + if (this_len < ce_namelen(next_ce) && - next_name[this_len] == '/' && + next_name[this_len] <= '/' && strncmp(this_name, next_name, this_len) == 0) { + if (next_name[this_len] == '/') { + conflict_name = next_name; + } else if (next_name[this_len] < '/') { + /* + * The immediately next entry shares our + * prefix but sorts before "path/" (e.g., + * "path-internal" between "path" and + * "path/file", since '-' (0x2D) < '/' + * (0x2F)). Binary search to find where + * "path/" would be and check for a D/F + * conflict there. + */ + struct cache_entry *other; + struct strbuf probe = STRBUF_INIT; + int pos; + + strbuf_add(&probe, this_name, this_len); + strbuf_addch(&probe, '/'); + pos = index_name_pos_sparse(istate, + probe.buf, + probe.len); + strbuf_release(&probe); + + if (pos < 0) + pos = -pos - 1; + if (pos >= (int)istate->cache_nr) + continue; + other = istate->cache[pos]; + if (ce_namelen(other) > this_len && + other->name[this_len] == '/' && + !strncmp(this_name, other->name, this_len)) + conflict_name = other->name; + } + } + + if (conflict_name) { if (10 < ++funny) { fprintf(stderr, "...\n"); break; } fprintf(stderr, "You have both %s and %s\n", - this_name, next_name); + this_name, conflict_name); } } if (funny) diff --git a/t/meson.build b/t/meson.build index 7528e5cda5..362177999b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -124,6 +124,7 @@ integration_tests = [ 't0090-cache-tree.sh', 't0091-bugreport.sh', 't0092-diagnose.sh', + 't0093-verify-cache-df-gap.sh', 't0095-bloom.sh', 't0100-previous.sh', 't0101-at-syntax.sh', diff --git a/t/t0093-direct-index-write.pl b/t/t0093-direct-index-write.pl new file mode 100644 index 0000000000..2881a3ebb2 --- /dev/null +++ b/t/t0093-direct-index-write.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl +# +# Build a v2 index file from entries listed on stdin. +# Each line: "octalmode hex-oid name" +# Output: binary index written to stdout. +# +# This bypasses all D/F safety checks in add_index_entry(), simulating +# what happens when code uses ADD_CACHE_JUST_APPEND to bulk-load entries. +use strict; +use warnings; +use Digest::SHA qw(sha1 sha256); + +my $hash_algo = $ENV{'GIT_DEFAULT_HASH'} || 'sha1'; +my $hash_func = $hash_algo eq 'sha256' ? \&sha256 : \&sha1; + +my @entries; +while (my $line = ) { + chomp $line; + my ($mode, $oid_hex, $name) = split(/ /, $line, 3); + push @entries, [$mode, $oid_hex, $name]; +} + +my $body = "DIRC" . pack("NN", 2, scalar @entries); + +for my $ent (@entries) { + my ($mode, $oid_hex, $name) = @{$ent}; + # 10 x 32-bit stat fields (zeroed), with mode in position 7 + my $stat = pack("N10", 0, 0, 0, 0, 0, 0, oct($mode), 0, 0, 0); + my $oid = pack("H*", $oid_hex); + my $flags = pack("n", length($name) & 0xFFF); + my $entry = $stat . $oid . $flags . $name . "\0"; + # Pad to 8-byte boundary + while (length($entry) % 8) { $entry .= "\0"; } + $body .= $entry; +} + +binmode STDOUT; +print $body . $hash_func->($body); diff --git a/t/t0093-verify-cache-df-gap.sh b/t/t0093-verify-cache-df-gap.sh new file mode 100755 index 0000000000..0b6829d805 --- /dev/null +++ b/t/t0093-verify-cache-df-gap.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='verify_cache() must catch non-adjacent D/F conflicts + +Ensure that verify_cache() can complain about bad entries like: + + docs <-- submodule + docs-internal/... <-- sorts here because "-" < "/" + docs/... <-- D/F conflict with "docs" above, not adjacent + +In order to test verify_cache, we directly construct a corrupt index +(bypassing the D/F safety checks in add_index_entry) and verify that +write-tree rejects it. +' + +. ./test-lib.sh + +if ! test_have_prereq PERL +then + skip_all='skipping verify_cache D/F tests; Perl not available' + test_done +fi + +# Build a v2 index from entries on stdin, bypassing D/F checks. +# Each line: "octalmode hex-oid name" (entries must be pre-sorted). +build_corrupt_index () { + perl "$TEST_DIRECTORY/t0093-direct-index-write.pl" >"$1" +} + +test_expect_success 'setup objects' ' + test_commit base && + BLOB=$(git rev-parse HEAD:base.t) && + SUB_COMMIT=$(git rev-parse HEAD) +' + +test_expect_success 'adjacent D/F conflict is caught by verify_cache' ' + cat >index-entries <<-EOF && + 0160000 $SUB_COMMIT docs + 0100644 $BLOB docs/requirements.txt + EOF + build_corrupt_index .git/index err && + test_grep "You have both docs and docs/requirements.txt" err +' + +test_expect_success 'non-adjacent D/F conflict is caught by verify_cache' ' + cat >index-entries <<-EOF && + 0160000 $SUB_COMMIT docs + 0100644 $BLOB docs-internal/README.md + 0100644 $BLOB docs/requirements.txt + EOF + build_corrupt_index .git/index err && + test_grep "You have both docs and docs/requirements.txt" err +' + +test_done