From 3f5203eeb46e6e01c7848acde4bf5a62d0dd7d1b Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 11 Jun 2026 21:31:51 -0700 Subject: [PATCH] ls-files: filter pathspec before lstat In --deleted and --modified modes, show_files() calls lstat() for each index entry before show_ce() applies the pathspec. prune_index() avoids most of these calls for pathspecs with a common directory prefix, but not for a top-level name or leading wildcard. Match before lstat() to avoid accessing the worktree for entries that cannot be shown. Treat this as a prefilter: do not update ps_matched, and retain the match in show_ce() so --error-unmatch is satisfied only by entries that the selected modes actually show. Prefilter only a single pathspec item, bounding the added work for each index entry. Applying match_pathspec() to multiple arguments can cost more than the lstat() calls it avoids. In a synthetic repository with 10,000 clean files, passing every path to ls-files --modified increased runtime from 112.5 ms to 494.1 ms when the prefilter was unconditional. With $parent and $this exported as paths to binaries built from the parent and this commit, on a repository with 881,290 index entries: hyperfine --warmup 0 --runs 3 \ --command-name parent \ '$parent -c core.fsmonitor=false ls-files --deleted -- README.md >/dev/null' \ --command-name this-commit \ '$this -c core.fsmonitor=false ls-files --deleted -- README.md >/dev/null' reported means of 65.790 seconds for the parent and 4.987 seconds for this commit. Link: https://lore.kernel.org/r/xmqqfr2tnfk0.fsf@gitster.g Helped-by: Jeff King Signed-off-by: Tamir Duberstein Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 11 ++++++++++ t/meson.build | 1 + t/perf/p3010-ls-files.sh | 31 +++++++++++++++++++++++++++++ t/t3010-ls-files-killed-modified.sh | 18 +++++++++++++++++ 4 files changed, 61 insertions(+) create mode 100755 t/perf/p3010-ls-files.sh diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 12d5d828ff..99e98fc187 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -453,6 +453,17 @@ static void show_files(struct repository *repo, struct dir_struct *dir) continue; if (ce_skip_worktree(ce)) continue; + /* + * match_pathspec() is linear in pathspec.nr, so prefilter only + * the single-pathspec case. Only entries shown by show_ce() + * satisfy --error-unmatch. + */ + if (pathspec.nr == 1 && + !match_pathspec(repo->index, &pathspec, fullname.buf, + fullname.len, max_prefix_len, NULL, + S_ISDIR(ce->ce_mode) || + S_ISGITLINK(ce->ce_mode))) + continue; stat_err = lstat(fullname.buf, &st); if (stat_err && (errno != ENOENT && errno != ENOTDIR)) error_errno("cannot lstat '%s'", fullname.buf); diff --git a/t/meson.build b/t/meson.build index c5832fee05..3219264fe7 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1141,6 +1141,7 @@ benchmarks = [ 'perf/p1500-graph-walks.sh', 'perf/p1501-rev-parse-oneline.sh', 'perf/p2000-sparse-operations.sh', + 'perf/p3010-ls-files.sh', 'perf/p3400-rebase.sh', 'perf/p3404-rebase-interactive.sh', 'perf/p4000-diff-algorithms.sh', diff --git a/t/perf/p3010-ls-files.sh b/t/perf/p3010-ls-files.sh new file mode 100755 index 0000000000..ae14449432 --- /dev/null +++ b/t/perf/p3010-ls-files.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +test_description='Tests ls-files worktree performance' + +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +test_expect_success 'select a zero-prefix pathspec' ' + tracked_file=$(git ls-files | sed -n 1p) && + test -n "$tracked_file" && + pathspec="?${tracked_file#?}" && + test_export pathspec +' + +test_perf 'ls-files --deleted with pathspec' ' + git -c core.fsmonitor=false ls-files --deleted \ + -- "$pathspec" >/dev/null +' + +test_perf 'ls-files --deleted with all-matching pathspec' ' + git -c core.fsmonitor=false ls-files --deleted -- "*" >/dev/null +' + +test_perf 'ls-files --modified with pathspec' ' + git -c core.fsmonitor=false ls-files --modified \ + -- "$pathspec" >/dev/null +' + +test_done diff --git a/t/t3010-ls-files-killed-modified.sh b/t/t3010-ls-files-killed-modified.sh index 7af4532cd1..6e38e10219 100755 --- a/t/t3010-ls-files-killed-modified.sh +++ b/t/t3010-ls-files-killed-modified.sh @@ -124,4 +124,22 @@ test_expect_success 'validate git ls-files -m output.' ' test_cmp .expected .output ' +test_expect_success 'worktree modes honor wildcard pathspecs' ' + cat >.expected <<-\EOF && + path2/file2 + path3/file3 + EOF + git ls-files --deleted -- "path?/file?" >.output && + test_cmp .expected .output && + + cat >.expected <<-\EOF && + path7 + path8 + EOF + git ls-files --modified --error-unmatch -- "path[78]" >.output && + test_cmp .expected .output && + + test_must_fail git ls-files --modified --error-unmatch -- path10 +' + test_done