mirror of
https://github.com/git-for-windows/git.git
synced 2026-06-01 07:08:34 -05:00
One way to significantly reduce the cost of a Git clone and later fetches is to use a blobless partial clone and combine that with a sparse-checkout that reduces the paths that need to be populated in the working directory. Not only does this reduce the cost of clones and fetches, the sparse-checkout reduces the number of objects needed to download from a promisor remote. However, history investigations can be expensie as computing blob diffs will trigger promisor remote requests for one object at a time. This can be avoided by downloading the blobs needed for the given sparse-checkout using 'git backfill' and its new '--sparse' mode, at a time that the user is willing to pay that extra cost. Note that this is distinctly different from the '--filter=sparse:<oid>' option, as this assumes that the partial clone has all reachable trees and we are using client-side logic to avoid downloading blobs outside of the sparse-checkout cone. This avoids the server-side cost of walking trees while also achieving a similar goal. It also downloads in batches based on similar path names, presenting a resumable download if things are interrupted. This augments the path-walk API to have a possibly-NULL 'pl' member that may point to a 'struct pattern_list'. This could be more general than the sparse-checkout definition at HEAD, but 'git backfill --sparse' is currently the only consumer. Be sure to test this in both cone mode and not cone mode. Cone mode has the benefit that the path-walk can skip certain paths once they would expand beyond the sparse-checkout. Signed-off-by: Derrick Stolee <stolee@gmail.com>
134 lines
3.0 KiB
C
134 lines
3.0 KiB
C
#define USE_THE_REPOSITORY_VARIABLE
|
|
|
|
#include "test-tool.h"
|
|
#include "dir.h"
|
|
#include "environment.h"
|
|
#include "hex.h"
|
|
#include "object-name.h"
|
|
#include "object.h"
|
|
#include "pretty.h"
|
|
#include "revision.h"
|
|
#include "setup.h"
|
|
#include "parse-options.h"
|
|
#include "strbuf.h"
|
|
#include "path-walk.h"
|
|
#include "oid-array.h"
|
|
|
|
static const char * const path_walk_usage[] = {
|
|
N_("test-tool path-walk <options> -- <revision-options>"),
|
|
NULL
|
|
};
|
|
|
|
struct path_walk_test_data {
|
|
uintmax_t commit_nr;
|
|
uintmax_t tree_nr;
|
|
uintmax_t blob_nr;
|
|
uintmax_t tag_nr;
|
|
};
|
|
|
|
static int emit_block(const char *path, struct oid_array *oids,
|
|
enum object_type type, void *data)
|
|
{
|
|
struct path_walk_test_data *tdata = data;
|
|
const char *typestr;
|
|
|
|
switch (type) {
|
|
case OBJ_COMMIT:
|
|
typestr = "COMMIT";
|
|
tdata->commit_nr += oids->nr;
|
|
break;
|
|
|
|
case OBJ_TREE:
|
|
typestr = "TREE";
|
|
tdata->tree_nr += oids->nr;
|
|
break;
|
|
|
|
case OBJ_BLOB:
|
|
typestr = "BLOB";
|
|
tdata->blob_nr += oids->nr;
|
|
break;
|
|
|
|
case OBJ_TAG:
|
|
typestr = "TAG";
|
|
tdata->tag_nr += oids->nr;
|
|
break;
|
|
|
|
default:
|
|
BUG("we do not understand this type");
|
|
}
|
|
|
|
for (size_t i = 0; i < oids->nr; i++) {
|
|
struct object *o = lookup_unknown_object(the_repository,
|
|
&oids->oid[i]);
|
|
printf("%s:%s:%s%s\n", typestr, path, oid_to_hex(&oids->oid[i]),
|
|
o->flags & UNINTERESTING ? ":UNINTERESTING" : "");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int cmd__path_walk(int argc, const char **argv)
|
|
{
|
|
int res, stdin_pl = 0;
|
|
struct rev_info revs = REV_INFO_INIT;
|
|
struct path_walk_info info = PATH_WALK_INFO_INIT;
|
|
struct path_walk_test_data data = { 0 };
|
|
struct option options[] = {
|
|
OPT_BOOL(0, "blobs", &info.blobs,
|
|
N_("toggle inclusion of blob objects")),
|
|
OPT_BOOL(0, "commits", &info.commits,
|
|
N_("toggle inclusion of commit objects")),
|
|
OPT_BOOL(0, "tags", &info.tags,
|
|
N_("toggle inclusion of tag objects")),
|
|
OPT_BOOL(0, "trees", &info.trees,
|
|
N_("toggle inclusion of tree objects")),
|
|
OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
|
|
N_("toggle pruning of uninteresting paths")),
|
|
OPT_BOOL(0, "stdin-pl", &stdin_pl,
|
|
N_("read a pattern list over stdin")),
|
|
OPT_END(),
|
|
};
|
|
|
|
setup_git_directory();
|
|
revs.repo = the_repository;
|
|
|
|
argc = parse_options(argc, argv, NULL,
|
|
options, path_walk_usage,
|
|
PARSE_OPT_KEEP_UNKNOWN_OPT | PARSE_OPT_KEEP_ARGV0);
|
|
|
|
if (argc > 1)
|
|
setup_revisions(argc, argv, &revs, NULL);
|
|
else
|
|
usage(path_walk_usage[0]);
|
|
|
|
info.revs = &revs;
|
|
info.path_fn = emit_block;
|
|
info.path_fn_data = &data;
|
|
|
|
if (stdin_pl) {
|
|
struct strbuf in = STRBUF_INIT;
|
|
CALLOC_ARRAY(info.pl, 1);
|
|
|
|
info.pl->use_cone_patterns = 1;
|
|
|
|
strbuf_fread(&in, 2048, stdin);
|
|
add_patterns_from_buffer(in.buf, in.len, "", 0, info.pl);
|
|
strbuf_release(&in);
|
|
}
|
|
|
|
res = walk_objects_by_path(&info);
|
|
|
|
printf("commits:%" PRIuMAX "\n"
|
|
"trees:%" PRIuMAX "\n"
|
|
"blobs:%" PRIuMAX "\n"
|
|
"tags:%" PRIuMAX "\n",
|
|
data.commit_nr, data.tree_nr, data.blob_nr, data.tag_nr);
|
|
|
|
if (info.pl) {
|
|
clear_pattern_list(info.pl);
|
|
free(info.pl);
|
|
}
|
|
release_revisions(&revs);
|
|
return res;
|
|
}
|