Merge branch 'en/ort-harden-against-corrupt-trees' into jch

"ort" merge backend handles merging corrupt trees better by
aborting when it should.

* en/ort-harden-against-corrupt-trees:
  cache-tree: fix verify_cache() to catch non-adjacent D/F conflicts
  merge-ort: abort merge when trees have duplicate entries
  merge-ort: free diff pairs queue in clear_or_reinit_internal_opts()
  merge-ort: drop unnecessary show_all_errors from collect_merge_info()
  merge-ort: propagate callback errors from traverse_trees_wrapper()
This commit is contained in:
Junio C Hamano
2026-06-04 08:13:59 +09:00
6 changed files with 239 additions and 37 deletions

View File

@@ -193,22 +193,62 @@ static int verify_cache(struct index_state *istate, int flags)
for (i = 0; i + 1 < istate->cache_nr; i++) {
/* path/file always comes after path because of the way
* the cache is sorted. Also path can appear only once,
* which means conflicting one would immediately follow.
* so path/file is likely the immediately following path
* but might be separated if there is e.g. a
* path-internal/... file.
*/
const struct cache_entry *this_ce = istate->cache[i];
const struct cache_entry *next_ce = istate->cache[i + 1];
const char *this_name = this_ce->name;
const char *next_name = next_ce->name;
int this_len = ce_namelen(this_ce);
const char *conflict_name = NULL;
if (this_len < ce_namelen(next_ce) &&
next_name[this_len] == '/' &&
next_name[this_len] <= '/' &&
strncmp(this_name, next_name, this_len) == 0) {
if (next_name[this_len] == '/') {
conflict_name = next_name;
} else if (next_name[this_len] < '/') {
/*
* The immediately next entry shares our
* prefix but sorts before "path/" (e.g.,
* "path-internal" between "path" and
* "path/file", since '-' (0x2D) < '/'
* (0x2F)). Binary search to find where
* "path/" would be and check for a D/F
* conflict there.
*/
struct cache_entry *other;
struct strbuf probe = STRBUF_INIT;
int pos;
strbuf_add(&probe, this_name, this_len);
strbuf_addch(&probe, '/');
pos = index_name_pos_sparse(istate,
probe.buf,
probe.len);
strbuf_release(&probe);
if (pos < 0)
pos = -pos - 1;
if (pos >= (int)istate->cache_nr)
continue;
other = istate->cache[pos];
if (ce_namelen(other) > this_len &&
other->name[this_len] == '/' &&
!strncmp(this_name, other->name, this_len))
conflict_name = other->name;
}
}
if (conflict_name) {
if (10 < ++funny) {
fprintf(stderr, "...\n");
break;
}
fprintf(stderr, "You have both %s and %s\n",
this_name, next_name);
this_name, conflict_name);
}
}
if (funny)

View File

@@ -728,6 +728,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
strset_clear_func(&renames->deferred[i].target_dirs);
renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
free(renames->pairs[i].queue);
diff_queue_init(&renames->pairs[i]);
}
renames->cached_pairs_valid_side = 0;
renames->dir_rename_mask = 0;
@@ -1008,32 +1010,34 @@ static int traverse_trees_wrapper(struct index_state *istate,
info->traverse_path = renames->callback_data_traverse_path;
info->fn = old_fn;
for (i = old_offset; i < renames->callback_data_nr; ++i) {
info->fn(n,
renames->callback_data[i].mask,
renames->callback_data[i].dirmask,
renames->callback_data[i].names,
info);
ret = info->fn(n,
renames->callback_data[i].mask,
renames->callback_data[i].dirmask,
renames->callback_data[i].names,
info);
if (ret < 0)
break;
}
renames->callback_data_nr = old_offset;
free(renames->callback_data_traverse_path);
renames->callback_data_traverse_path = old_callback_data_traverse_path;
info->traverse_path = NULL;
return 0;
return ret < 0 ? ret : 0;
}
static void setup_path_info(struct merge_options *opt,
struct string_list_item *result,
const char *current_dir_name,
int current_dir_name_len,
char *fullpath, /* we'll take over ownership */
struct name_entry *names,
struct name_entry *merged_version,
unsigned is_null, /* boolean */
unsigned df_conflict, /* boolean */
unsigned filemask,
unsigned dirmask,
int resolved /* boolean */)
static int setup_path_info(struct merge_options *opt,
struct string_list_item *result,
const char *current_dir_name,
int current_dir_name_len,
char *fullpath, /* we'll take over ownership */
struct name_entry *names,
struct name_entry *merged_version,
unsigned is_null, /* boolean */
unsigned df_conflict, /* boolean */
unsigned filemask,
unsigned dirmask,
int resolved /* boolean */)
{
/* result->util is void*, so mi is a convenience typed variable */
struct merged_info *mi;
@@ -1077,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt,
*/
mi->is_null = 1;
}
strmap_put(&opt->priv->paths, fullpath, mi);
if (strmap_put(&opt->priv->paths, fullpath, mi))
return error(_("tree has duplicate entries for '%s'"), fullpath);
result->string = fullpath;
result->util = mi;
return 0;
}
static void add_pair(struct merge_options *opt,
@@ -1346,9 +1352,10 @@ static int collect_merge_info_callback(int n,
*/
if (side1_matches_mbase && side2_matches_mbase) {
/* mbase, side1, & side2 all match; use mbase as resolution */
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+0, mbase_null, 0 /* df_conflict */,
filemask, dirmask, 1 /* resolved */);
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+0, mbase_null, 0 /* df_conflict */,
filemask, dirmask, 1 /* resolved */))
return -1; /* Quit traversing */
return mask;
}
@@ -1360,9 +1367,10 @@ static int collect_merge_info_callback(int n,
*/
if (sides_match && filemask == 0x07) {
/* use side1 (== side2) version as resolution */
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+1, side1_null, 0,
filemask, dirmask, 1);
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+1, side1_null, 0,
filemask, dirmask, 1))
return -1; /* Quit traversing */
return mask;
}
@@ -1374,18 +1382,20 @@ static int collect_merge_info_callback(int n,
*/
if (side1_matches_mbase && filemask == 0x07) {
/* use side2 version as resolution */
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+2, side2_null, 0,
filemask, dirmask, 1);
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+2, side2_null, 0,
filemask, dirmask, 1))
return -1; /* Quit traversing */
return mask;
}
/* Similar to above but swapping sides 1 and 2 */
if (side2_matches_mbase && filemask == 0x07) {
/* use side1 version as resolution */
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+1, side1_null, 0,
filemask, dirmask, 1);
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, names+1, side1_null, 0,
filemask, dirmask, 1))
return -1; /* Quit traversing */
return mask;
}
@@ -1409,8 +1419,9 @@ static int collect_merge_info_callback(int n,
* unconflict some more cases, but that comes later so all we can
* do now is record the different non-null file hashes.)
*/
setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, NULL, 0, df_conflict, filemask, dirmask, 0);
if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
names, NULL, 0, df_conflict, filemask, dirmask, 0))
return -1; /* Quit traversing */
ci = pi.util;
VERIFY_CI(ci);
@@ -1738,7 +1749,6 @@ static int collect_merge_info(struct merge_options *opt,
setup_traverse_info(&info, opt->priv->toplevel_dir);
info.fn = collect_merge_info_callback;
info.data = opt;
info.show_all_errors = 1;
if (repo_parse_tree(opt->repo, merge_base) < 0 ||
repo_parse_tree(opt->repo, side1) < 0 ||

View File

@@ -125,6 +125,7 @@ integration_tests = [
't0090-cache-tree.sh',
't0091-bugreport.sh',
't0092-diagnose.sh',
't0093-verify-cache-df-gap.sh',
't0095-bloom.sh',
't0100-previous.sh',
't0101-at-syntax.sh',

View File

@@ -0,0 +1,38 @@
#!/usr/bin/perl
#
# Build a v2 index file from entries listed on stdin.
# Each line: "octalmode hex-oid name"
# Output: binary index written to stdout.
#
# This bypasses all D/F safety checks in add_index_entry(), simulating
# what happens when code uses ADD_CACHE_JUST_APPEND to bulk-load entries.
use strict;
use warnings;
use Digest::SHA qw(sha1 sha256);
my $hash_algo = $ENV{'GIT_DEFAULT_HASH'} || 'sha1';
my $hash_func = $hash_algo eq 'sha256' ? \&sha256 : \&sha1;
my @entries;
while (my $line = <STDIN>) {
chomp $line;
my ($mode, $oid_hex, $name) = split(/ /, $line, 3);
push @entries, [$mode, $oid_hex, $name];
}
my $body = "DIRC" . pack("NN", 2, scalar @entries);
for my $ent (@entries) {
my ($mode, $oid_hex, $name) = @{$ent};
# 10 x 32-bit stat fields (zeroed), with mode in position 7
my $stat = pack("N10", 0, 0, 0, 0, 0, 0, oct($mode), 0, 0, 0);
my $oid = pack("H*", $oid_hex);
my $flags = pack("n", length($name) & 0xFFF);
my $entry = $stat . $oid . $flags . $name . "\0";
# Pad to 8-byte boundary
while (length($entry) % 8) { $entry .= "\0"; }
$body .= $entry;
}
binmode STDOUT;
print $body . $hash_func->($body);

59
t/t0093-verify-cache-df-gap.sh Executable file
View File

@@ -0,0 +1,59 @@
#!/bin/sh
test_description='verify_cache() must catch non-adjacent D/F conflicts
Ensure that verify_cache() can complain about bad entries like:
docs <-- submodule
docs-internal/... <-- sorts here because "-" < "/"
docs/... <-- D/F conflict with "docs" above, not adjacent
In order to test verify_cache, we directly construct a corrupt index
(bypassing the D/F safety checks in add_index_entry) and verify that
write-tree rejects it.
'
. ./test-lib.sh
if ! test_have_prereq PERL
then
skip_all='skipping verify_cache D/F tests; Perl not available'
test_done
fi
# Build a v2 index from entries on stdin, bypassing D/F checks.
# Each line: "octalmode hex-oid name" (entries must be pre-sorted).
build_corrupt_index () {
perl "$TEST_DIRECTORY/t0093-direct-index-write.pl" >"$1"
}
test_expect_success 'setup objects' '
test_commit base &&
BLOB=$(git rev-parse HEAD:base.t) &&
SUB_COMMIT=$(git rev-parse HEAD)
'
test_expect_success 'adjacent D/F conflict is caught by verify_cache' '
cat >index-entries <<-EOF &&
0160000 $SUB_COMMIT docs
0100644 $BLOB docs/requirements.txt
EOF
build_corrupt_index .git/index <index-entries &&
test_must_fail git write-tree 2>err &&
test_grep "You have both docs and docs/requirements.txt" err
'
test_expect_success 'non-adjacent D/F conflict is caught by verify_cache' '
cat >index-entries <<-EOF &&
0160000 $SUB_COMMIT docs
0100644 $BLOB docs-internal/README.md
0100644 $BLOB docs/requirements.txt
EOF
build_corrupt_index .git/index <index-entries &&
test_must_fail git write-tree 2>err &&
test_grep "You have both docs and docs/requirements.txt" err
'
test_done

View File

@@ -1525,4 +1525,58 @@ test_expect_success 'submodule/directory preliminary conflict' '
)
'
# Testcase: submodule/directory conflict with duplicate tree entries
# One side has a path as a gitlink (submodule). The other side replaces
# the gitlink with a directory. A third-party tool creates a tree on the
# submodule side that has *both* a gitlink and a tree entry for the same
# path (adding a file inside the submodule path ignoring that there's a
# gitlink there). collect_merge_info_callback() should detect the
# duplicate and abort rather than silently corrupting its bookkeeping.
test_expect_success 'duplicate tree entries trigger an error' '
test_when_finished "rm -rf duplicate-entry" &&
git init duplicate-entry &&
(
cd duplicate-entry &&
# Base commit: "docs" is a gitlink (submodule)
empty_tree=$(git mktree </dev/null) &&
fake_commit=$(git commit-tree $empty_tree </dev/null) &&
git update-index --add --cacheinfo 160000,$fake_commit,docs &&
echo base >file.txt &&
git add file.txt &&
git commit -m base &&
# side1: remove the gitlink, replace with a directory
git checkout -b side1 &&
git rm --cached docs &&
mkdir -p docs &&
echo hello >docs/requirements.txt &&
git add docs/requirements.txt &&
git commit -m "side1: submodule to directory" &&
# side2: keep the gitlink but craft a tree that also
# contains a tree entry for "docs" (simulating a tool
# that adds files inside a submodule path without
# removing the gitlink first).
git checkout main &&
git checkout -b side2 &&
blob_oid=$(echo world | git hash-object -w --stdin) &&
docs_tree=$(printf "100644 blob %s\trequirements.txt\n" \
"$blob_oid" | git mktree) &&
cur_tree=$(git rev-parse HEAD^{tree}) &&
git cat-file -p $cur_tree >tree-listing &&
printf "040000 tree %s\tdocs\n" "$docs_tree" >>tree-listing &&
new_tree=$(git mktree <tree-listing) &&
side2_commit=$(git commit-tree $new_tree -p HEAD \
-m "side2: add file alongside submodule") &&
git update-ref refs/heads/side2 $side2_commit &&
# Merging must detect the duplicate and abort
git checkout side1 &&
test_must_fail git merge side2 2>err &&
test_grep "duplicate entries" err
)
'
test_done