mirror of
https://github.com/git-for-windows/git.git
synced 2026-06-11 19:16:58 -05:00
The path-walk API prunes trees and blobs when a sparse-checkout pattern list is provided, which is the correct behavior for 'git backfill --sparse' since it only needs to fill in objects at paths within the sparse cone. However, a future change will use the path-walk API with a sparse:<oid> filter that restricts only blobs while retaining all reachable trees. To support both behaviors, add a 'pl_sparse_trees' flag to path_walk_info. When set (as in 'git backfill --sparse' and the --stdin-pl test helper mode), the sparse patterns prune both trees and blobs. When unset, only blobs are filtered and all trees are walked and reported. Additionally, move the SEEN flag assignment in add_tree_entries() to after the sparse pattern and pathspec checks. Previously, SEEN was set immediately upon discovering an object, before checking whether its path matched the sparse patterns. When the same object ID appeared at multiple paths (e.g. sibling directories with identical contents), the first path to be visited would mark the object as SEEN. If that path was outside the sparse cone, the object would be skipped there but also never discovered at its in-cone path. By deferring the SEEN flag until after the checks pass, objects that are skipped due to sparse filtering remain discoverable at other paths where they may be in scope. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
189 lines
4.9 KiB
C
189 lines
4.9 KiB
C
/* We need this macro to access core_apply_sparse_checkout */
|
|
#define USE_THE_REPOSITORY_VARIABLE
|
|
|
|
#include "builtin.h"
|
|
#include "git-compat-util.h"
|
|
#include "config.h"
|
|
#include "parse-options.h"
|
|
#include "repository.h"
|
|
#include "commit.h"
|
|
#include "dir.h"
|
|
#include "environment.h"
|
|
#include "hex.h"
|
|
#include "tree.h"
|
|
#include "tree-walk.h"
|
|
#include "object.h"
|
|
#include "odb.h"
|
|
#include "oid-array.h"
|
|
#include "oidset.h"
|
|
#include "promisor-remote.h"
|
|
#include "strmap.h"
|
|
#include "string-list.h"
|
|
#include "revision.h"
|
|
#include "trace2.h"
|
|
#include "progress.h"
|
|
#include "packfile.h"
|
|
#include "path-walk.h"
|
|
|
|
static const char * const builtin_backfill_usage[] = {
|
|
N_("git backfill [--min-batch-size=<n>] [--[no-]sparse] [--[no-]include-edges] [<revision-range>]"),
|
|
NULL
|
|
};
|
|
|
|
struct backfill_context {
|
|
struct repository *repo;
|
|
struct oid_array current_batch;
|
|
size_t min_batch_size;
|
|
int sparse;
|
|
int include_edges;
|
|
struct rev_info revs;
|
|
};
|
|
|
|
static void backfill_context_clear(struct backfill_context *ctx)
|
|
{
|
|
oid_array_clear(&ctx->current_batch);
|
|
}
|
|
|
|
static void download_batch(struct backfill_context *ctx)
|
|
{
|
|
promisor_remote_get_direct(ctx->repo,
|
|
ctx->current_batch.oid,
|
|
ctx->current_batch.nr);
|
|
oid_array_clear(&ctx->current_batch);
|
|
|
|
/*
|
|
* We likely have a new packfile. Add it to the packed list to
|
|
* avoid possible duplicate downloads of the same objects.
|
|
*/
|
|
odb_reprepare(ctx->repo->objects);
|
|
}
|
|
|
|
static int fill_missing_blobs(const char *path UNUSED,
|
|
struct oid_array *list,
|
|
enum object_type type,
|
|
void *data)
|
|
{
|
|
struct backfill_context *ctx = data;
|
|
|
|
if (type != OBJ_BLOB)
|
|
return 0;
|
|
|
|
for (size_t i = 0; i < list->nr; i++) {
|
|
if (!odb_has_object(ctx->repo->objects, &list->oid[i], 0))
|
|
oid_array_append(&ctx->current_batch, &list->oid[i]);
|
|
}
|
|
|
|
if (ctx->current_batch.nr >= ctx->min_batch_size)
|
|
download_batch(ctx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void reject_unsupported_rev_list_options(struct rev_info *revs)
|
|
{
|
|
if (revs->diffopt.pickaxe)
|
|
die(_("'%s' cannot be used with 'git backfill'"),
|
|
(revs->diffopt.pickaxe_opts & DIFF_PICKAXE_REGEX) ? "-G" : "-S");
|
|
if (revs->diffopt.filter || revs->diffopt.filter_not)
|
|
die(_("'%s' cannot be used with 'git backfill'"),
|
|
"--diff-filter");
|
|
if (revs->diffopt.flags.follow_renames)
|
|
die(_("'%s' cannot be used with 'git backfill'"),
|
|
"--follow");
|
|
if (revs->line_level_traverse)
|
|
die(_("'%s' cannot be used with 'git backfill'"),
|
|
"-L");
|
|
if (revs->explicit_diff_merges)
|
|
die(_("'%s' cannot be used with 'git backfill'"),
|
|
"--diff-merges");
|
|
if (!path_walk_filter_compatible(&revs->filter))
|
|
die(_("cannot backfill with these filter options"));
|
|
if (revs->filter.blob_limit_value)
|
|
die(_("cannot backfill with blob size limits"));
|
|
}
|
|
|
|
static int do_backfill(struct backfill_context *ctx)
|
|
{
|
|
struct path_walk_info info = PATH_WALK_INFO_INIT;
|
|
int ret;
|
|
|
|
if (ctx->sparse) {
|
|
CALLOC_ARRAY(info.pl, 1);
|
|
info.pl_sparse_trees = 1;
|
|
if (get_sparse_checkout_patterns(info.pl)) {
|
|
path_walk_info_clear(&info);
|
|
return error(_("problem loading sparse-checkout"));
|
|
}
|
|
}
|
|
|
|
/* Walk from HEAD if otherwise unspecified. */
|
|
if (!ctx->revs.pending.nr)
|
|
add_head_to_pending(&ctx->revs);
|
|
if (ctx->include_edges)
|
|
ctx->revs.edge_hint = 1;
|
|
|
|
info.blobs = 1;
|
|
info.tags = info.commits = info.trees = 0;
|
|
|
|
info.revs = &ctx->revs;
|
|
info.path_fn = fill_missing_blobs;
|
|
info.path_fn_data = ctx;
|
|
|
|
ret = walk_objects_by_path(&info);
|
|
|
|
/* Download the objects that did not fill a batch. */
|
|
if (!ret)
|
|
download_batch(ctx);
|
|
|
|
path_walk_info_clear(&info);
|
|
return ret;
|
|
}
|
|
|
|
int cmd_backfill(int argc, const char **argv, const char *prefix, struct repository *repo)
|
|
{
|
|
int result;
|
|
struct backfill_context ctx = {
|
|
.repo = repo,
|
|
.current_batch = OID_ARRAY_INIT,
|
|
.min_batch_size = 50000,
|
|
.sparse = -1,
|
|
.revs = REV_INFO_INIT,
|
|
.include_edges = 1,
|
|
};
|
|
struct option options[] = {
|
|
OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size,
|
|
N_("Minimum number of objects to request at a time")),
|
|
OPT_BOOL(0, "sparse", &ctx.sparse,
|
|
N_("Restrict the missing objects to the current sparse-checkout")),
|
|
OPT_BOOL(0, "include-edges", &ctx.include_edges,
|
|
N_("Include blobs from boundary commits in the backfill")),
|
|
OPT_END(),
|
|
};
|
|
struct repo_config_values *cfg = repo_config_values(the_repository);
|
|
|
|
show_usage_with_options_if_asked(argc, argv,
|
|
builtin_backfill_usage, options);
|
|
|
|
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
|
|
PARSE_OPT_KEEP_UNKNOWN_OPT |
|
|
PARSE_OPT_KEEP_ARGV0 |
|
|
PARSE_OPT_KEEP_DASHDASH);
|
|
|
|
repo_init_revisions(repo, &ctx.revs, prefix);
|
|
argc = setup_revisions(argc, argv, &ctx.revs, NULL);
|
|
|
|
if (argc > 1)
|
|
die(_("unrecognized argument: %s"), argv[1]);
|
|
reject_unsupported_rev_list_options(&ctx.revs);
|
|
|
|
repo_config(repo, git_default_config, NULL);
|
|
|
|
if (ctx.sparse < 0)
|
|
ctx.sparse = cfg->apply_sparse_checkout;
|
|
|
|
result = do_backfill(&ctx);
|
|
backfill_context_clear(&ctx);
|
|
release_revisions(&ctx.revs);
|
|
return result;
|
|
}
|