Merge branch 'ds/backfill-revs'

`git backfill` learned to accept revision and pathspec arguments.

* ds/backfill-revs:
  t5620: test backfill's unknown argument handling
  path-walk: support wildcard pathspecs for blob filtering
  backfill: work with prefix pathspecs
  backfill: accept revision arguments
  t5620: prepare branched repo for revision tests
  revision: include object-name.h
This commit is contained in:
Junio C Hamano
2026-04-03 13:01:08 -07:00
7 changed files with 280 additions and 10 deletions

View File

@@ -63,9 +63,12 @@ OPTIONS
current sparse-checkout. If the sparse-checkout feature is enabled,
then `--sparse` is assumed and can be disabled with `--no-sparse`.
You may also specify the commit limiting options from linkgit:git-rev-list[1].
SEE ALSO
--------
linkgit:git-clone[1].
linkgit:git-clone[1],
linkgit:git-rev-list[1]
GIT
---

View File

@@ -35,6 +35,7 @@ struct backfill_context {
struct oid_array current_batch;
size_t min_batch_size;
int sparse;
struct rev_info revs;
};
static void backfill_context_clear(struct backfill_context *ctx)
@@ -79,7 +80,6 @@ static int fill_missing_blobs(const char *path UNUSED,
static int do_backfill(struct backfill_context *ctx)
{
struct rev_info revs;
struct path_walk_info info = PATH_WALK_INFO_INIT;
int ret;
@@ -91,13 +91,14 @@ static int do_backfill(struct backfill_context *ctx)
}
}
repo_init_revisions(ctx->repo, &revs, "");
handle_revision_arg("HEAD", &revs, 0, 0);
/* Walk from HEAD if otherwise unspecified. */
if (!ctx->revs.pending.nr)
add_head_to_pending(&ctx->revs);
info.blobs = 1;
info.tags = info.commits = info.trees = 0;
info.revs = &revs;
info.revs = &ctx->revs;
info.path_fn = fill_missing_blobs;
info.path_fn_data = ctx;
@@ -108,7 +109,6 @@ static int do_backfill(struct backfill_context *ctx)
download_batch(ctx);
path_walk_info_clear(&info);
release_revisions(&revs);
return ret;
}
@@ -120,6 +120,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
.current_batch = OID_ARRAY_INIT,
.min_batch_size = 50000,
.sparse = 0,
.revs = REV_INFO_INIT,
};
struct option options[] = {
OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
@@ -134,7 +135,15 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
builtin_backfill_usage, options);
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
0);
PARSE_OPT_KEEP_UNKNOWN_OPT |
PARSE_OPT_KEEP_ARGV0 |
PARSE_OPT_KEEP_DASHDASH);
repo_init_revisions(repo, &ctx.revs, prefix);
argc = setup_revisions(argc, argv, &ctx.revs, NULL);
if (argc > 1)
die(_("unrecognized argument: %s"), argv[1]);
repo_config(repo, git_default_config, NULL);
@@ -143,5 +152,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit
result = do_backfill(&ctx);
backfill_context_clear(&ctx);
release_revisions(&ctx.revs);
return result;
}

View File

@@ -11,6 +11,7 @@
#include "list-objects.h"
#include "object.h"
#include "oid-array.h"
#include "path.h"
#include "prio-queue.h"
#include "repository.h"
#include "revision.h"
@@ -62,6 +63,8 @@ struct path_walk_context {
*/
struct prio_queue path_stack;
struct strset path_stack_pushed;
unsigned exact_pathspecs:1;
};
static int compare_by_type(const void *one, const void *two, void *cb_data)
@@ -206,6 +209,33 @@ static int add_tree_entries(struct path_walk_context *ctx,
match != MATCHED)
continue;
}
if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) {
struct pathspec *pd = &ctx->revs->prune_data;
bool found = false;
int did_strip_suffix = strbuf_strip_suffix(&path, "/");
for (int i = 0; i < pd->nr; i++) {
struct pathspec_item *item = &pd->items[i];
/*
* Continue if either is a directory prefix
* of the other.
*/
if (dir_prefix(path.buf, item->match) ||
dir_prefix(item->match, path.buf)) {
found = true;
break;
}
}
if (did_strip_suffix)
strbuf_addch(&path, '/');
/* Skip paths that do not match the prefix. */
if (!found)
continue;
}
add_path_to_list(ctx, path.buf, type, &entry.oid,
!(o->flags & UNINTERESTING));
@@ -274,6 +304,13 @@ static int walk_path(struct path_walk_context *ctx,
return 0;
}
if (list->type == OBJ_BLOB &&
ctx->revs->prune_data.nr &&
!match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
path, strlen(path), 0,
NULL, 0))
return 0;
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
@@ -481,6 +518,12 @@ int walk_objects_by_path(struct path_walk_info *info)
if (info->tags)
info->revs->tag_objects = 1;
if (ctx.revs->prune_data.nr) {
if (!ctx.revs->prune_data.has_wildcard &&
!ctx.revs->prune_data.magic)
ctx.exact_pathspecs = 1;
}
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;

2
path.c
View File

@@ -56,7 +56,7 @@ static void strbuf_cleanup_path(struct strbuf *sb)
strbuf_remove(sb, 0, path - sb->buf);
}
static int dir_prefix(const char *buf, const char *dir)
int dir_prefix(const char *buf, const char *dir)
{
size_t len = strlen(dir);
return !strncmp(buf, dir, len) &&

6
path.h
View File

@@ -112,6 +112,12 @@ const char *repo_submodule_path_replace(struct repository *repo,
const char *fmt, ...)
__attribute__((format (printf, 4, 5)));
/*
* Given a directory name 'dir' (not ending with a trailing '/'),
* determine if 'buf' is equal to 'dir' or has prefix 'dir'+'/'.
*/
int dir_prefix(const char *buf, const char *dir);
void report_linked_checkout_garbage(struct repository *r);
/*

View File

@@ -4,6 +4,7 @@
#include "commit.h"
#include "grep.h"
#include "notes.h"
#include "object-name.h"
#include "oidset.h"
#include "pretty.h"
#include "diff.h"

View File

@@ -7,6 +7,14 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
test_expect_success 'backfill rejects unexpected arguments' '
test_must_fail git backfill unexpected-arg 2>err &&
test_grep "ambiguous argument .*unexpected-arg" err &&
test_must_fail git backfill --all --unexpected-arg --first-parent 2>err &&
test_grep "unrecognized argument: --unexpected-arg" err
'
# We create objects in the 'src' repo.
test_expect_success 'setup repo for object creation' '
echo "{print \$1}" >print_1.awk &&
@@ -15,7 +23,7 @@ test_expect_success 'setup repo for object creation' '
git init src &&
mkdir -p src/a/b/c &&
mkdir -p src/d/e &&
mkdir -p src/d/f &&
for i in 1 2
do
@@ -26,8 +34,9 @@ test_expect_success 'setup repo for object creation' '
echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt &&
echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt &&
echo "Version $i of file d/$n" > src/d/file.$n.txt &&
echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt &&
echo "Version $i of file d/f/$n" > src/d/f/file.$n.txt &&
git -C src add . &&
test_tick &&
git -C src commit -m "Iteration $n" || return 1
done
done
@@ -41,6 +50,53 @@ test_expect_success 'setup bare clone for server' '
git -C srv.bare config --local uploadpack.allowanysha1inwant 1
'
# Create a version of the repo with branches for testing revision
# arguments like --all, --first-parent, and --since.
#
# main: 8 commits (linear) + merge of side branch
# 48 original blobs + 4 side blobs = 52 blobs from main HEAD
# side: 2 commits adding s/file.{1,2}.txt (v1, v2), merged into main
# other: 1 commit adding o/file.{1,2}.txt (not merged)
# 54 total blobs reachable from --all
test_expect_success 'setup branched repo for revision tests' '
git clone src src-revs &&
# Side branch from tip of main with unique files
git -C src-revs checkout -b side HEAD &&
mkdir -p src-revs/s &&
echo "Side version 1 of file 1" >src-revs/s/file.1.txt &&
echo "Side version 1 of file 2" >src-revs/s/file.2.txt &&
test_tick &&
git -C src-revs add . &&
git -C src-revs commit -m "Side commit 1" &&
echo "Side version 2 of file 1" >src-revs/s/file.1.txt &&
echo "Side version 2 of file 2" >src-revs/s/file.2.txt &&
test_tick &&
git -C src-revs add . &&
git -C src-revs commit -m "Side commit 2" &&
# Merge side into main
git -C src-revs checkout main &&
test_tick &&
git -C src-revs merge side --no-ff -m "Merge side branch" &&
# Other branch (not merged) for --all testing
git -C src-revs checkout -b other main~1 &&
mkdir -p src-revs/o &&
echo "Other content 1" >src-revs/o/file.1.txt &&
echo "Other content 2" >src-revs/o/file.2.txt &&
test_tick &&
git -C src-revs add . &&
git -C src-revs commit -m "Other commit" &&
git -C src-revs checkout main &&
git clone --bare "file://$(pwd)/src-revs" srv-revs.bare &&
git -C srv-revs.bare config --local uploadpack.allowfilter 1 &&
git -C srv-revs.bare config --local uploadpack.allowanysha1inwant 1
'
# do basic partial clone from "srv.bare"
test_expect_success 'do partial clone 1, backfill gets all objects' '
git clone --no-checkout --filter=blob:none \
@@ -176,6 +232,157 @@ test_expect_success 'backfill --sparse without cone mode (negative)' '
test_line_count = 12 missing
'
test_expect_success 'backfill with revision range' '
test_when_finished rm -rf backfill-revs &&
git clone --no-checkout --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-revs &&
# No blobs yet
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
git -C backfill-revs backfill HEAD~2..HEAD &&
# 30 objects downloaded.
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 18 missing
'
test_expect_success 'backfill with revisions over stdin' '
test_when_finished rm -rf backfill-revs &&
git clone --no-checkout --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-revs &&
# No blobs yet
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
cat >in <<-EOF &&
HEAD
^HEAD~2
EOF
git -C backfill-revs backfill --stdin <in &&
# 30 objects downloaded.
git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 18 missing
'
test_expect_success 'backfill with prefix pathspec' '
test_when_finished rm -rf backfill-path &&
git clone --bare --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-path &&
# No blobs yet
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
git -C backfill-path backfill HEAD -- d/f 2>err &&
test_must_be_empty err &&
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 40 missing
'
test_expect_success 'backfill with multiple pathspecs' '
test_when_finished rm -rf backfill-path &&
git clone --bare --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-path &&
# No blobs yet
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
git -C backfill-path backfill HEAD -- d/f a 2>err &&
test_must_be_empty err &&
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 16 missing
'
test_expect_success 'backfill with wildcard pathspec' '
test_when_finished rm -rf backfill-path &&
git clone --bare --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv.bare" backfill-path &&
# No blobs yet
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 48 missing &&
git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err &&
test_must_be_empty err &&
git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 40 missing
'
test_expect_success 'backfill with --all' '
test_when_finished rm -rf backfill-all &&
git clone --no-checkout --filter=blob:none \
"file://$(pwd)/srv-revs.bare" backfill-all &&
# All blobs from all refs are missing
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
test_line_count = 54 missing &&
# Backfill from HEAD gets main blobs only
git -C backfill-all backfill HEAD &&
# Other branch blobs still missing
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
test_line_count = 2 missing &&
# Backfill with --all gets everything
git -C backfill-all backfill --all &&
git -C backfill-all rev-list --quiet --objects --all --missing=print >missing &&
test_line_count = 0 missing
'
test_expect_success 'backfill with --first-parent' '
test_when_finished rm -rf backfill-fp &&
git clone --no-checkout --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv-revs.bare" backfill-fp &&
git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 52 missing &&
# --first-parent skips the side branch commits, so
# s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed.
git -C backfill-fp backfill --first-parent HEAD &&
git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 2 missing
'
test_expect_success 'backfill with --since' '
test_when_finished rm -rf backfill-since &&
git clone --no-checkout --filter=blob:none \
--single-branch --branch=main \
"file://$(pwd)/srv-revs.bare" backfill-since &&
git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 52 missing &&
# Use a cutoff between commits 4 and 5 (between v1 and v2
# iterations). Commits 5-8 still carry v1 of files 2-4 in
# their trees, but v1 of file.1.txt is only in commits 1-4.
SINCE=$(git -C backfill-since log --first-parent --reverse \
--format=%ct HEAD~1 | sed -n 5p) &&
git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD &&
# 6 missing: v1 of file.1.txt in all 6 directories
git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing &&
test_line_count = 6 missing
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd