mirror of
https://github.com/git-for-windows/git.git
synced 2025-12-11 19:22:58 -06:00
In the presence of many tags, the use of oid_array_lookup() can become extremely slow. We should rely upon the SEEN bit instead. This affects the tag-peeling walk as well as the switch statement for adding the peeled object to the correct oid_array. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
426 lines
11 KiB
C
426 lines
11 KiB
C
/*
|
|
* path-walk.c: implementation for path-based walks of the object graph.
|
|
*/
|
|
#include "git-compat-util.h"
|
|
#include "path-walk.h"
|
|
#include "blob.h"
|
|
#include "commit.h"
|
|
#include "dir.h"
|
|
#include "hashmap.h"
|
|
#include "hex.h"
|
|
#include "object.h"
|
|
#include "oid-array.h"
|
|
#include "repository.h"
|
|
#include "revision.h"
|
|
#include "string-list.h"
|
|
#include "strmap.h"
|
|
#include "tag.h"
|
|
#include "trace2.h"
|
|
#include "tree.h"
|
|
#include "tree-walk.h"
|
|
|
|
struct type_and_oid_list
|
|
{
|
|
enum object_type type;
|
|
struct oid_array oids;
|
|
int maybe_interesting;
|
|
};
|
|
|
|
#define TYPE_AND_OID_LIST_INIT { \
|
|
.type = OBJ_NONE, \
|
|
.oids = OID_ARRAY_INIT \
|
|
}
|
|
|
|
struct path_walk_context {
|
|
/**
|
|
* Repeats of data in 'struct path_walk_info' for
|
|
* access with fewer characters.
|
|
*/
|
|
struct repository *repo;
|
|
struct rev_info *revs;
|
|
struct path_walk_info *info;
|
|
|
|
/**
|
|
* Map a path to a 'struct type_and_oid_list'
|
|
* containing the objects discovered at that
|
|
* path.
|
|
*/
|
|
struct strmap paths_to_lists;
|
|
|
|
/**
|
|
* Store the current list of paths in a stack, to
|
|
* facilitate depth-first-search without recursion.
|
|
*/
|
|
struct string_list path_stack;
|
|
};
|
|
|
|
static int add_children(struct path_walk_context *ctx,
|
|
const char *base_path,
|
|
struct object_id *oid)
|
|
{
|
|
struct tree_desc desc;
|
|
struct name_entry entry;
|
|
struct strbuf path = STRBUF_INIT;
|
|
size_t base_len;
|
|
struct tree *tree = lookup_tree(ctx->repo, oid);
|
|
|
|
if (!tree) {
|
|
error(_("failed to walk children of tree %s: not found"),
|
|
oid_to_hex(oid));
|
|
return -1;
|
|
} else if (parse_tree_gently(tree, 1)) {
|
|
die("bad tree object %s", oid_to_hex(oid));
|
|
}
|
|
|
|
strbuf_addstr(&path, base_path);
|
|
base_len = path.len;
|
|
|
|
parse_tree(tree);
|
|
init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
|
|
while (tree_entry(&desc, &entry)) {
|
|
struct type_and_oid_list *list;
|
|
struct object *o;
|
|
/* Not actually true, but we will ignore submodules later. */
|
|
enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;
|
|
|
|
/* Skip submodules. */
|
|
if (S_ISGITLINK(entry.mode))
|
|
continue;
|
|
|
|
/* If the caller doesn't want blobs, then don't bother. */
|
|
if (!ctx->info->blobs && type == OBJ_BLOB)
|
|
continue;
|
|
|
|
if (type == OBJ_TREE) {
|
|
struct tree *child = lookup_tree(ctx->repo, &entry.oid);
|
|
o = child ? &child->object : NULL;
|
|
} else if (type == OBJ_BLOB) {
|
|
struct blob *child = lookup_blob(ctx->repo, &entry.oid);
|
|
o = child ? &child->object : NULL;
|
|
} else {
|
|
/* Wrong type? */
|
|
continue;
|
|
}
|
|
|
|
if (!o) /* report error?*/
|
|
continue;
|
|
|
|
/* Skip this object if already seen. */
|
|
if (o->flags & SEEN)
|
|
continue;
|
|
o->flags |= SEEN;
|
|
|
|
strbuf_setlen(&path, base_len);
|
|
strbuf_add(&path, entry.path, entry.pathlen);
|
|
|
|
/*
|
|
* Trees will end with "/" for concatenation and distinction
|
|
* from blobs at the same path.
|
|
*/
|
|
if (type == OBJ_TREE)
|
|
strbuf_addch(&path, '/');
|
|
|
|
if (ctx->info->pl) {
|
|
int dtype;
|
|
enum pattern_match_result match;
|
|
match = path_matches_pattern_list(path.buf, path.len,
|
|
path.buf + base_len, &dtype,
|
|
ctx->info->pl,
|
|
ctx->repo->index);
|
|
|
|
if (ctx->info->pl->use_cone_patterns &&
|
|
match == NOT_MATCHED)
|
|
continue;
|
|
else if (!ctx->info->pl->use_cone_patterns &&
|
|
type == OBJ_BLOB &&
|
|
match != MATCHED)
|
|
continue;
|
|
}
|
|
|
|
if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
|
|
CALLOC_ARRAY(list, 1);
|
|
list->type = type;
|
|
strmap_put(&ctx->paths_to_lists, path.buf, list);
|
|
string_list_append(&ctx->path_stack, path.buf);
|
|
}
|
|
if (!(o->flags & UNINTERESTING))
|
|
list->maybe_interesting = 1;
|
|
oid_array_append(&list->oids, &entry.oid);
|
|
}
|
|
|
|
free_tree_buffer(tree);
|
|
strbuf_release(&path);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* For each path in paths_to_explore, walk the trees another level
|
|
* and add any found blobs to the batch (but only if they exist and
|
|
* haven't been added yet).
|
|
*/
|
|
static int walk_path(struct path_walk_context *ctx,
|
|
const char *path)
|
|
{
|
|
struct type_and_oid_list *list;
|
|
int ret = 0;
|
|
|
|
list = strmap_get(&ctx->paths_to_lists, path);
|
|
|
|
if (ctx->info->prune_all_uninteresting) {
|
|
/*
|
|
* This is true if all objects were UNINTERESTING
|
|
* when added to the list.
|
|
*/
|
|
if (!list->maybe_interesting)
|
|
return 0;
|
|
|
|
/*
|
|
* But it's still possible that the objects were set
|
|
* as UNINTERESTING after being added. Do a quick check.
|
|
*/
|
|
list->maybe_interesting = 0;
|
|
for (size_t i = 0;
|
|
!list->maybe_interesting && i < list->oids.nr;
|
|
i++) {
|
|
if (list->type == OBJ_TREE) {
|
|
struct tree *t = lookup_tree(ctx->repo,
|
|
&list->oids.oid[i]);
|
|
if (t && !(t->object.flags & UNINTERESTING))
|
|
list->maybe_interesting = 1;
|
|
} else {
|
|
struct blob *b = lookup_blob(ctx->repo,
|
|
&list->oids.oid[i]);
|
|
if (b && !(b->object.flags & UNINTERESTING))
|
|
list->maybe_interesting = 1;
|
|
}
|
|
}
|
|
|
|
/* We have confirmed that all objects are UNINTERESTING. */
|
|
if (!list->maybe_interesting)
|
|
return 0;
|
|
}
|
|
|
|
/* Evaluate function pointer on this data, if requested. */
|
|
if ((list->type == OBJ_TREE && ctx->info->trees) ||
|
|
(list->type == OBJ_BLOB && ctx->info->blobs))
|
|
ret = ctx->info->path_fn(path, &list->oids, list->type,
|
|
ctx->info->path_fn_data);
|
|
|
|
/* Expand data for children. */
|
|
if (list->type == OBJ_TREE) {
|
|
for (size_t i = 0; i < list->oids.nr; i++) {
|
|
ret |= add_children(ctx,
|
|
path,
|
|
&list->oids.oid[i]);
|
|
}
|
|
}
|
|
|
|
oid_array_clear(&list->oids);
|
|
strmap_remove(&ctx->paths_to_lists, path, 1);
|
|
return ret;
|
|
}
|
|
|
|
static void clear_strmap(struct strmap *map)
|
|
{
|
|
struct hashmap_iter iter;
|
|
struct strmap_entry *e;
|
|
|
|
hashmap_for_each_entry(&map->map, &iter, e, ent) {
|
|
struct type_and_oid_list *list = e->value;
|
|
oid_array_clear(&list->oids);
|
|
}
|
|
strmap_clear(map, 1);
|
|
strmap_init(map);
|
|
}
|
|
|
|
/**
|
|
* Given the configuration of 'info', walk the commits based on 'info->revs' and
|
|
* call 'info->path_fn' on each discovered path.
|
|
*
|
|
* Returns nonzero on an error.
|
|
*/
|
|
int walk_objects_by_path(struct path_walk_info *info)
|
|
{
|
|
const char *root_path = "";
|
|
int ret = 0, has_uninteresting = 0;
|
|
size_t commits_nr = 0, paths_nr = 0;
|
|
struct commit *c;
|
|
struct type_and_oid_list *root_tree_list;
|
|
struct type_and_oid_list *commit_list;
|
|
struct path_walk_context ctx = {
|
|
.repo = info->revs->repo,
|
|
.revs = info->revs,
|
|
.info = info,
|
|
.path_stack = STRING_LIST_INIT_DUP,
|
|
.paths_to_lists = STRMAP_INIT
|
|
};
|
|
struct oidset root_tree_set = OIDSET_INIT;
|
|
|
|
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
|
|
|
|
CALLOC_ARRAY(commit_list, 1);
|
|
commit_list->type = OBJ_COMMIT;
|
|
|
|
if (info->tags)
|
|
info->revs->tag_objects = 1;
|
|
|
|
/* Insert a single list for the root tree into the paths. */
|
|
CALLOC_ARRAY(root_tree_list, 1);
|
|
root_tree_list->type = OBJ_TREE;
|
|
root_tree_list->maybe_interesting = 1;
|
|
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
|
|
|
|
/*
|
|
* Set these values before preparing the walk to catch
|
|
* lightweight tags pointing to non-commits.
|
|
*/
|
|
info->revs->blob_objects = info->blobs;
|
|
info->revs->tree_objects = info->trees;
|
|
|
|
if (prepare_revision_walk(info->revs))
|
|
die(_("failed to setup revision walk"));
|
|
|
|
info->revs->blob_objects = info->revs->tree_objects = 0;
|
|
|
|
if (info->tags) {
|
|
struct oid_array tagged_blob_list = OID_ARRAY_INIT;
|
|
struct oid_array tags = OID_ARRAY_INIT;
|
|
|
|
trace2_region_enter("path-walk", "tag-walk", info->revs->repo);
|
|
|
|
/*
|
|
* Walk any pending objects at this point, but they should only
|
|
* be tags.
|
|
*/
|
|
for (size_t i = 0; i < info->revs->pending.nr; i++) {
|
|
struct object_array_entry *pending = info->revs->pending.objects + i;
|
|
struct object *obj = pending->item;
|
|
|
|
if (obj->type == OBJ_COMMIT || obj->flags & SEEN)
|
|
continue;
|
|
|
|
while (obj->type == OBJ_TAG) {
|
|
struct tag *tag = lookup_tag(info->revs->repo,
|
|
&obj->oid);
|
|
if (!(obj->flags & SEEN)) {
|
|
obj->flags |= SEEN;
|
|
oid_array_append(&tags, &obj->oid);
|
|
}
|
|
obj = tag->tagged;
|
|
}
|
|
|
|
if ((obj->flags & SEEN))
|
|
continue;
|
|
obj->flags |= SEEN;
|
|
|
|
switch (obj->type) {
|
|
case OBJ_TREE:
|
|
if (info->trees)
|
|
oid_array_append(&root_tree_list->oids, &obj->oid);
|
|
break;
|
|
|
|
case OBJ_BLOB:
|
|
if (info->blobs)
|
|
oid_array_append(&tagged_blob_list, &obj->oid);
|
|
break;
|
|
|
|
case OBJ_COMMIT:
|
|
/* Make sure it is in the object walk */
|
|
add_pending_object(info->revs, obj, "");
|
|
break;
|
|
|
|
default:
|
|
BUG("should not see any other type here");
|
|
}
|
|
}
|
|
|
|
info->path_fn("", &tags, OBJ_TAG, info->path_fn_data);
|
|
|
|
if (tagged_blob_list.nr && info->blobs)
|
|
info->path_fn("/tagged-blobs", &tagged_blob_list, OBJ_BLOB,
|
|
info->path_fn_data);
|
|
|
|
trace2_data_intmax("path-walk", ctx.repo, "tags", tags.nr);
|
|
trace2_region_leave("path-walk", "tag-walk", info->revs->repo);
|
|
oid_array_clear(&tags);
|
|
oid_array_clear(&tagged_blob_list);
|
|
}
|
|
|
|
while ((c = get_revision(info->revs))) {
|
|
struct object_id *oid;
|
|
struct tree *t;
|
|
commits_nr++;
|
|
|
|
if (info->commits)
|
|
oid_array_append(&commit_list->oids,
|
|
&c->object.oid);
|
|
|
|
/* If we only care about commits, then skip trees. */
|
|
if (!info->trees && !info->blobs)
|
|
continue;
|
|
|
|
oid = get_commit_tree_oid(c);
|
|
t = lookup_tree(info->revs->repo, oid);
|
|
|
|
if (t) {
|
|
if (t->object.flags & SEEN)
|
|
continue;
|
|
t->object.flags |= SEEN;
|
|
|
|
if (!oidset_insert(&root_tree_set, oid))
|
|
oid_array_append(&root_tree_list->oids, oid);
|
|
} else {
|
|
warning("could not find tree %s", oid_to_hex(oid));
|
|
}
|
|
|
|
if (t && (c->object.flags & UNINTERESTING)) {
|
|
t->object.flags |= UNINTERESTING;
|
|
has_uninteresting = 1;
|
|
}
|
|
}
|
|
|
|
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
|
|
trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
|
|
|
|
/* Track all commits. */
|
|
if (info->commits)
|
|
ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
|
|
info->path_fn_data);
|
|
oid_array_clear(&commit_list->oids);
|
|
free(commit_list);
|
|
|
|
/*
|
|
* Before performing a DFS of our paths and emitting them as interesting,
|
|
* do a full walk of the trees to distribute the UNINTERESTING bit. Use
|
|
* the sparse algorithm if prune_all_uninteresting was set.
|
|
*/
|
|
if (has_uninteresting) {
|
|
trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
|
|
if (info->prune_all_uninteresting)
|
|
mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
|
|
else
|
|
mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
|
|
trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
|
|
}
|
|
oidset_clear(&root_tree_set);
|
|
|
|
string_list_append(&ctx.path_stack, root_path);
|
|
|
|
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
|
|
while (!ret && ctx.path_stack.nr) {
|
|
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
|
|
ctx.path_stack.nr--;
|
|
paths_nr++;
|
|
|
|
ret = walk_path(&ctx, path);
|
|
|
|
free(path);
|
|
}
|
|
trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr);
|
|
trace2_region_leave("path-walk", "path-walk", info->revs->repo);
|
|
|
|
clear_strmap(&ctx.paths_to_lists);
|
|
string_list_clear(&ctx.path_stack, 0);
|
|
return ret;
|
|
}
|