/* * path-walk.c: implementation for path-based walks of the object graph. */ #include "git-compat-util.h" #include "path-walk.h" #include "blob.h" #include "commit.h" #include "dir.h" #include "hashmap.h" #include "hex.h" #include "object.h" #include "oid-array.h" #include "repository.h" #include "revision.h" #include "string-list.h" #include "strmap.h" #include "tag.h" #include "trace2.h" #include "tree.h" #include "tree-walk.h" struct type_and_oid_list { enum object_type type; struct oid_array oids; int maybe_interesting; }; #define TYPE_AND_OID_LIST_INIT { \ .type = OBJ_NONE, \ .oids = OID_ARRAY_INIT \ } struct path_walk_context { /** * Repeats of data in 'struct path_walk_info' for * access with fewer characters. */ struct repository *repo; struct rev_info *revs; struct path_walk_info *info; /** * Map a path to a 'struct type_and_oid_list' * containing the objects discovered at that * path. */ struct strmap paths_to_lists; /** * Store the current list of paths in a stack, to * facilitate depth-first-search without recursion. */ struct string_list path_stack; }; static int add_children(struct path_walk_context *ctx, const char *base_path, struct object_id *oid) { struct tree_desc desc; struct name_entry entry; struct strbuf path = STRBUF_INIT; size_t base_len; struct tree *tree = lookup_tree(ctx->repo, oid); if (!tree) { error(_("failed to walk children of tree %s: not found"), oid_to_hex(oid)); return -1; } else if (parse_tree_gently(tree, 1)) { die("bad tree object %s", oid_to_hex(oid)); } strbuf_addstr(&path, base_path); base_len = path.len; parse_tree(tree); init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); while (tree_entry(&desc, &entry)) { struct type_and_oid_list *list; struct object *o; /* Not actually true, but we will ignore submodules later. */ enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; /* Skip submodules. */ if (S_ISGITLINK(entry.mode)) continue; /* If the caller doesn't want blobs, then don't bother. */ if (!ctx->info->blobs && type == OBJ_BLOB) continue; if (type == OBJ_TREE) { struct tree *child = lookup_tree(ctx->repo, &entry.oid); o = child ? &child->object : NULL; } else if (type == OBJ_BLOB) { struct blob *child = lookup_blob(ctx->repo, &entry.oid); o = child ? &child->object : NULL; } else { /* Wrong type? */ continue; } if (!o) /* report error?*/ continue; /* Skip this object if already seen. */ if (o->flags & SEEN) continue; o->flags |= SEEN; strbuf_setlen(&path, base_len); strbuf_add(&path, entry.path, entry.pathlen); /* * Trees will end with "/" for concatenation and distinction * from blobs at the same path. */ if (type == OBJ_TREE) strbuf_addch(&path, '/'); if (ctx->info->pl) { int dtype; enum pattern_match_result match; match = path_matches_pattern_list(path.buf, path.len, path.buf + base_len, &dtype, ctx->info->pl, ctx->repo->index); if (ctx->info->pl->use_cone_patterns && match == NOT_MATCHED) continue; else if (!ctx->info->pl->use_cone_patterns && type == OBJ_BLOB && match != MATCHED) continue; } if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { CALLOC_ARRAY(list, 1); list->type = type; strmap_put(&ctx->paths_to_lists, path.buf, list); string_list_append(&ctx->path_stack, path.buf); } if (!(o->flags & UNINTERESTING)) list->maybe_interesting = 1; oid_array_append(&list->oids, &entry.oid); } free_tree_buffer(tree); strbuf_release(&path); return 0; } /* * For each path in paths_to_explore, walk the trees another level * and add any found blobs to the batch (but only if they exist and * haven't been added yet). */ static int walk_path(struct path_walk_context *ctx, const char *path) { struct type_and_oid_list *list; int ret = 0; list = strmap_get(&ctx->paths_to_lists, path); if (ctx->info->prune_all_uninteresting) { /* * This is true if all objects were UNINTERESTING * when added to the list. */ if (!list->maybe_interesting) return 0; /* * But it's still possible that the objects were set * as UNINTERESTING after being added. Do a quick check. */ list->maybe_interesting = 0; for (size_t i = 0; !list->maybe_interesting && i < list->oids.nr; i++) { if (list->type == OBJ_TREE) { struct tree *t = lookup_tree(ctx->repo, &list->oids.oid[i]); if (t && !(t->object.flags & UNINTERESTING)) list->maybe_interesting = 1; } else { struct blob *b = lookup_blob(ctx->repo, &list->oids.oid[i]); if (b && !(b->object.flags & UNINTERESTING)) list->maybe_interesting = 1; } } /* We have confirmed that all objects are UNINTERESTING. */ if (!list->maybe_interesting) return 0; } /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs)) ret = ctx->info->path_fn(path, &list->oids, list->type, ctx->info->path_fn_data); /* Expand data for children. */ if (list->type == OBJ_TREE) { for (size_t i = 0; i < list->oids.nr; i++) { ret |= add_children(ctx, path, &list->oids.oid[i]); } } oid_array_clear(&list->oids); strmap_remove(&ctx->paths_to_lists, path, 1); return ret; } static void clear_strmap(struct strmap *map) { struct hashmap_iter iter; struct strmap_entry *e; hashmap_for_each_entry(&map->map, &iter, e, ent) { struct type_and_oid_list *list = e->value; oid_array_clear(&list->oids); } strmap_clear(map, 1); strmap_init(map); } /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. * * Returns nonzero on an error. */ int walk_objects_by_path(struct path_walk_info *info) { const char *root_path = ""; int ret = 0, has_uninteresting = 0; size_t commits_nr = 0, paths_nr = 0; struct commit *c; struct type_and_oid_list *root_tree_list; struct type_and_oid_list *commit_list; struct path_walk_context ctx = { .repo = info->revs->repo, .revs = info->revs, .info = info, .path_stack = STRING_LIST_INIT_DUP, .paths_to_lists = STRMAP_INIT }; struct oidset root_tree_set = OIDSET_INIT; trace2_region_enter("path-walk", "commit-walk", info->revs->repo); CALLOC_ARRAY(commit_list, 1); commit_list->type = OBJ_COMMIT; if (info->tags) info->revs->tag_objects = 1; /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; root_tree_list->maybe_interesting = 1; strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); /* * Set these values before preparing the walk to catch * lightweight tags pointing to non-commits. */ info->revs->blob_objects = info->blobs; info->revs->tree_objects = info->trees; if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); info->revs->blob_objects = info->revs->tree_objects = 0; if (info->tags) { struct oid_array tagged_blob_list = OID_ARRAY_INIT; struct oid_array tags = OID_ARRAY_INIT; trace2_region_enter("path-walk", "tag-walk", info->revs->repo); /* * Walk any pending objects at this point, but they should only * be tags. */ for (size_t i = 0; i < info->revs->pending.nr; i++) { struct object_array_entry *pending = info->revs->pending.objects + i; struct object *obj = pending->item; if (obj->type == OBJ_COMMIT || obj->flags & SEEN) continue; while (obj->type == OBJ_TAG) { struct tag *tag = lookup_tag(info->revs->repo, &obj->oid); if (!(obj->flags & SEEN)) { obj->flags |= SEEN; oid_array_append(&tags, &obj->oid); } obj = tag->tagged; } if ((obj->flags & SEEN)) continue; obj->flags |= SEEN; switch (obj->type) { case OBJ_TREE: if (info->trees) oid_array_append(&root_tree_list->oids, &obj->oid); break; case OBJ_BLOB: if (info->blobs) oid_array_append(&tagged_blob_list, &obj->oid); break; case OBJ_COMMIT: /* Make sure it is in the object walk */ add_pending_object(info->revs, obj, ""); break; default: BUG("should not see any other type here"); } } info->path_fn("", &tags, OBJ_TAG, info->path_fn_data); if (tagged_blob_list.nr && info->blobs) info->path_fn("/tagged-blobs", &tagged_blob_list, OBJ_BLOB, info->path_fn_data); trace2_data_intmax("path-walk", ctx.repo, "tags", tags.nr); trace2_region_leave("path-walk", "tag-walk", info->revs->repo); oid_array_clear(&tags); oid_array_clear(&tagged_blob_list); } while ((c = get_revision(info->revs))) { struct object_id *oid; struct tree *t; commits_nr++; if (info->commits) oid_array_append(&commit_list->oids, &c->object.oid); /* If we only care about commits, then skip trees. */ if (!info->trees && !info->blobs) continue; oid = get_commit_tree_oid(c); t = lookup_tree(info->revs->repo, oid); if (t) { if (t->object.flags & SEEN) continue; t->object.flags |= SEEN; if (!oidset_insert(&root_tree_set, oid)) oid_array_append(&root_tree_list->oids, oid); } else { warning("could not find tree %s", oid_to_hex(oid)); } if (t && (c->object.flags & UNINTERESTING)) { t->object.flags |= UNINTERESTING; has_uninteresting = 1; } } trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); trace2_region_leave("path-walk", "commit-walk", info->revs->repo); /* Track all commits. */ if (info->commits) ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT, info->path_fn_data); oid_array_clear(&commit_list->oids); free(commit_list); /* * Before performing a DFS of our paths and emitting them as interesting, * do a full walk of the trees to distribute the UNINTERESTING bit. Use * the sparse algorithm if prune_all_uninteresting was set. */ if (has_uninteresting) { trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo); if (info->prune_all_uninteresting) mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set); else mark_trees_uninteresting_dense(ctx.repo, &root_tree_set); trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo); } oidset_clear(&root_tree_set); string_list_append(&ctx.path_stack, root_path); trace2_region_enter("path-walk", "path-walk", info->revs->repo); while (!ret && ctx.path_stack.nr) { char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; ctx.path_stack.nr--; paths_nr++; ret = walk_path(&ctx, path); free(path); } trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); trace2_region_leave("path-walk", "path-walk", info->revs->repo); clear_strmap(&ctx.paths_to_lists); string_list_clear(&ctx.path_stack, 0); return ret; }