path-walk: add prune_all_uninteresting option

This option causes the path-walk API to act like the sparse tree-walk
algorithm implemented by mark_trees_uninteresting_sparse() in
list-objects.c.

Starting from the commits marked as UNINTERESTING, their root trees and
all objects reachable from those trees are UNINTERSTING, at least as we
walk path-by-path. When we reach a path where all objects associated
with that path are marked UNINTERESTING, then do no continue walking the
children of that path.

We need to be careful to pass the UNINTERESTING flag in a deep way on
the UNINTERESTING objects before we start the path-walk, or else the
depth-first search for the path-walk API may accidentally report some
objects as interesting.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
This commit is contained in:
Derrick Stolee 2024-09-03 21:55:47 -04:00 committed by Johannes Schindelin
parent 6b02e8c3a2
commit 2bd12ae253
5 changed files with 120 additions and 13 deletions

View File

@ -57,6 +57,14 @@ commits are emitted.
While it is possible to walk only commits in this way, consumers would be
better off using the revision walk API instead.
`prune_all_uninteresting`::
By default, all reachable paths are emitted by the path-walk API.
This option allows consumers to declare that they are not
interested in paths where all included objects are marked with the
`UNINTERESTING` flag. This requires using the `boundary` option in
the revision walk so that the walk emits commits marked with the
`UNINTERESTING` flag.
Examples
--------

View File

@ -22,6 +22,7 @@ struct type_and_oid_list
{
enum object_type type;
struct oid_array oids;
int maybe_interesting;
};
#define TYPE_AND_OID_LIST_INIT { \
@ -124,6 +125,8 @@ static int add_children(struct path_walk_context *ctx,
strmap_put(&ctx->paths_to_lists, path.buf, list);
string_list_append(&ctx->path_stack, path.buf);
}
if (!(o->flags & UNINTERESTING))
list->maybe_interesting = 1;
oid_array_append(&list->oids, &entry.oid);
}
@ -145,6 +148,40 @@ static int walk_path(struct path_walk_context *ctx,
list = strmap_get(&ctx->paths_to_lists, path);
if (ctx->info->prune_all_uninteresting) {
/*
* This is true if all objects were UNINTERESTING
* when added to the list.
*/
if (!list->maybe_interesting)
return 0;
/*
* But it's still possible that the objects were set
* as UNINTERESTING after being added. Do a quick check.
*/
list->maybe_interesting = 0;
for (size_t i = 0;
!list->maybe_interesting && i < list->oids.nr;
i++) {
if (list->type == OBJ_TREE) {
struct tree *t = lookup_tree(ctx->repo,
&list->oids.oid[i]);
if (t && !(t->object.flags & UNINTERESTING))
list->maybe_interesting = 1;
} else {
struct blob *b = lookup_blob(ctx->repo,
&list->oids.oid[i]);
if (b && !(b->object.flags & UNINTERESTING))
list->maybe_interesting = 1;
}
}
/* We have confirmed that all objects are UNINTERESTING. */
if (!list->maybe_interesting)
return 0;
}
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs))
@ -187,7 +224,7 @@ static void clear_strmap(struct strmap *map)
int walk_objects_by_path(struct path_walk_info *info)
{
const char *root_path = "";
int ret = 0;
int ret = 0, has_uninteresting = 0;
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
@ -199,6 +236,7 @@ int walk_objects_by_path(struct path_walk_info *info)
.path_stack = STRING_LIST_INIT_DUP,
.paths_to_lists = STRMAP_INIT
};
struct oidset root_tree_set = OIDSET_INIT;
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
@ -211,6 +249,7 @@ int walk_objects_by_path(struct path_walk_info *info)
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
root_tree_list->maybe_interesting = 1;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
/*
@ -301,10 +340,17 @@ int walk_objects_by_path(struct path_walk_info *info)
oid = get_commit_tree_oid(c);
t = lookup_tree(info->revs->repo, oid);
if (t)
if (t) {
oidset_insert(&root_tree_set, oid);
oid_array_append(&root_tree_list->oids, oid);
else
} else {
warning("could not find tree %s", oid_to_hex(oid));
}
if (t && (c->object.flags & UNINTERESTING)) {
t->object.flags |= UNINTERESTING;
has_uninteresting = 1;
}
}
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
@ -317,6 +363,21 @@ int walk_objects_by_path(struct path_walk_info *info)
oid_array_clear(&commit_list->oids);
free(commit_list);
/*
* Before performing a DFS of our paths and emitting them as interesting,
* do a full walk of the trees to distribute the UNINTERESTING bit. Use
* the sparse algorithm if prune_all_uninteresting was set.
*/
if (has_uninteresting) {
trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
if (info->prune_all_uninteresting)
mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
else
mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
}
oidset_clear(&root_tree_set);
string_list_append(&ctx.path_stack, root_path);
trace2_region_enter("path-walk", "path-walk", info->revs->repo);

View File

@ -38,6 +38,14 @@ struct path_walk_info {
int trees;
int blobs;
int tags;
/**
* When 'prune_all_uninteresting' is set and a path has all objects
* marked as UNINTERESTING, then the path-walk will not visit those
* objects. It will not call path_fn on those objects and will not
* walk the children of such trees.
*/
int prune_all_uninteresting;
};
#define PATH_WALK_INFO_INIT { \

View File

@ -55,8 +55,12 @@ static int emit_block(const char *path, struct oid_array *oids,
BUG("we do not understand this type");
}
for (size_t i = 0; i < oids->nr; i++)
printf("%s:%s:%s\n", typestr, path, oid_to_hex(&oids->oid[i]));
for (size_t i = 0; i < oids->nr; i++) {
struct object *o = lookup_unknown_object(the_repository,
&oids->oid[i]);
printf("%s:%s:%s%s\n", typestr, path, oid_to_hex(&oids->oid[i]),
o->flags & UNINTERESTING ? ":UNINTERESTING" : "");
}
return 0;
}
@ -76,6 +80,8 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle inclusion of tag objects")),
OPT_BOOL(0, "trees", &info.trees,
N_("toggle inclusion of tree objects")),
OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
N_("toggle pruning of uninteresting paths")),
OPT_END(),
};

View File

@ -229,19 +229,19 @@ test_expect_success 'topic, not base, boundary' '
cat >expect <<-EOF &&
COMMIT::$(git rev-parse topic)
COMMIT::$(git rev-parse base~1)
COMMIT::$(git rev-parse base~1):UNINTERESTING
commits:2
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base~1^{tree})
TREE:left/:$(git rev-parse base~1:left)
TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
TREE:right/:$(git rev-parse base~1:right)
TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
trees:5
BLOB:a:$(git rev-parse base~1:a)
BLOB:left/b:$(git rev-parse base~1:left/b)
BLOB:right/c:$(git rev-parse base~1:right/c)
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
BLOB:right/d:$(git rev-parse base~1:right/d)
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
tags:0
EOF
@ -252,4 +252,28 @@ test_expect_success 'topic, not base, boundary' '
test_cmp expect.sorted out.sorted
'
test_expect_success 'topic, not base, boundary with pruning' '
test-tool path-walk --prune -- --boundary topic --not base >out &&
cat >expect <<-EOF &&
COMMIT::$(git rev-parse topic)
COMMIT::$(git rev-parse base~1):UNINTERESTING
commits:2
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
trees:4
BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
blobs:2
tags:0
EOF
sort expect >expect.sorted &&
sort out >out.sorted &&
test_cmp expect.sorted out.sorted
'
test_done