From 84a1d0039a7d9974d4900d730ba7edeef31b8e43 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:36 +0200 Subject: [PATCH 01/11] builtin/cat-file: rename variable that tracks usage The usage strings for git-cat-file(1) that we pass to `parse_options()` and `usage_msg_optf()` are stored in a variable called `usage`. This variable shadows the declaration of `usage()`, which we'll want to use in a subsequent commit. Rename the variable to `builtin_catfile_usage`, which is in line with how the variable is typically called in other builtins. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 47 ++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b13561cf73..b158b3acef 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -941,7 +941,7 @@ int cmd_cat_file(int argc, int input_nul_terminated = 0; int nul_terminated = 0; - const char * const usage[] = { + const char * const builtin_catfile_usage[] = { N_("git cat-file "), N_("git cat-file (-e | -p) "), N_("git cat-file (-t | -s) [--allow-unknown-type] "), @@ -1007,7 +1007,7 @@ int cmd_cat_file(int argc, batch.buffer_output = -1; - argc = parse_options(argc, argv, prefix, options, usage, 0); + argc = parse_options(argc, argv, prefix, options, builtin_catfile_usage, 0); opt_cw = (opt == 'c' || opt == 'w'); opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's'); @@ -1021,7 +1021,7 @@ int cmd_cat_file(int argc, /* Option compatibility */ if (force_path && !opt_cw) usage_msg_optf(_("'%s=<%s>' needs '%s' or '%s'"), - usage, options, + builtin_catfile_usage, options, "--path", _("path|tree-ish"), "--filters", "--textconv"); @@ -1029,20 +1029,20 @@ int cmd_cat_file(int argc, if (batch.enabled) ; else if (batch.follow_symlinks) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--follow-symlinks"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--follow-symlinks"); else if (batch.buffer_output >= 0) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--buffer"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--buffer"); else if (batch.all_objects) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "--batch-all-objects"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "--batch-all-objects"); else if (input_nul_terminated) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "-z"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "-z"); else if (nul_terminated) - usage_msg_optf(_("'%s' requires a batch mode"), usage, options, - "-Z"); + usage_msg_optf(_("'%s' requires a batch mode"), builtin_catfile_usage, + options, "-Z"); batch.input_delim = batch.output_delim = '\n'; if (input_nul_terminated) @@ -1063,10 +1063,10 @@ int cmd_cat_file(int argc, batch.transform_mode = opt; else if (opt && opt != 'b') usage_msg_optf(_("'-%c' is incompatible with batch mode"), - usage, options, opt); + builtin_catfile_usage, options, opt); else if (argc) - usage_msg_opt(_("batch modes take no arguments"), usage, - options); + usage_msg_opt(_("batch modes take no arguments"), + builtin_catfile_usage, options); return batch_objects(&batch); } @@ -1074,22 +1074,25 @@ int cmd_cat_file(int argc, if (opt) { if (!argc && opt == 'c') usage_msg_optf(_(" required with '%s'"), - usage, options, "--textconv"); + builtin_catfile_usage, options, + "--textconv"); else if (!argc && opt == 'w') usage_msg_optf(_(" required with '%s'"), - usage, options, "--filters"); + builtin_catfile_usage, options, + "--filters"); else if (!argc && opt_epts) usage_msg_optf(_(" required with '-%c'"), - usage, options, opt); + builtin_catfile_usage, options, opt); else if (argc == 1) obj_name = argv[0]; else - usage_msg_opt(_("too many arguments"), usage, options); + usage_msg_opt(_("too many arguments"), builtin_catfile_usage, + options); } else if (!argc) { - usage_with_options(usage, options); + usage_with_options(builtin_catfile_usage, options); } else if (argc != 2) { usage_msg_optf(_("only two arguments allowed in mode, not %d"), - usage, options, argc); + builtin_catfile_usage, options, argc); } else if (argc) { exp_type = argv[0]; obj_name = argv[1]; From 1914ae0d706f7811eb9f293ae14ca9eb4f25fcca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:37 +0200 Subject: [PATCH 02/11] builtin/cat-file: introduce function to report object status We have multiple callsites that report the status of an object, for example when the objec tis missing or its name is ambiguous. We're about to add a couple more such callsites to report on "excluded" objects. Prepare for this by introducing a new function `report_object_status()` that encapsulates the functionality. Note that this function also flushes stdout, which is a requirement so that request-response style batched modes can learn about the status before proceeding to the next object. We already flush correctly at all existing callsites, even though the flush in `batch_one_object()` only comes after the switch statement. That flush is now redundant, and we could in theory deduplicate it by moving it into all branches that don't use `report_object_status()`. But that doesn't quite feel sensible: - The duplicate flush should ultimately just be a no-op for us and thus shouldn't impact performance significantly. - By keeping the flush in `report_object_status()` we ensure that all future callers get semantics correct. So let's just be pragmatic and live with the duplicated flush. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b158b3acef..1261a3ce35 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -455,6 +455,16 @@ static void print_default_format(struct strbuf *scratch, struct expand_data *dat (uintmax_t)data->size, opt->output_delim); } +static void report_object_status(struct batch_options *opt, + const char *obj_name, + const struct object_id *oid, + const char *status) +{ + printf("%s %s%c", obj_name ? obj_name : oid_to_hex(oid), + status, opt->output_delim); + fflush(stdout); +} + /* * If "pack" is non-NULL, then "offset" is the byte offset within the pack from * which the object may be accessed (though note that we may also rely on @@ -481,9 +491,7 @@ static void batch_object_write(const char *obj_name, &data->oid, &data->info, OBJECT_INFO_LOOKUP_REPLACE); if (ret < 0) { - printf("%s missing%c", - obj_name ? obj_name : oid_to_hex(&data->oid), opt->output_delim); - fflush(stdout); + report_object_status(opt, obj_name, &data->oid, "missing"); return; } @@ -535,10 +543,10 @@ static void batch_one_object(const char *obj_name, if (result != FOUND) { switch (result) { case MISSING_OBJECT: - printf("%s missing%c", obj_name, opt->output_delim); + report_object_status(opt, obj_name, &data->oid, "missing"); break; case SHORT_NAME_AMBIGUOUS: - printf("%s ambiguous%c", obj_name, opt->output_delim); + report_object_status(opt, obj_name, &data->oid, "ambiguous"); break; case DANGLING_SYMLINK: printf("dangling %"PRIuMAX"%c%s%c", From eb83e4c64b5a3458569593c2ab0c29365f10a82f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:38 +0200 Subject: [PATCH 03/11] builtin/cat-file: wire up an option to filter objects In batch mode, git-cat-file(1) enumerates all objects and prints them by iterating through both loose and packed objects. This works without considering their reachability at all, and consequently most options to filter objects as they exist in e.g. git-rev-list(1) are not applicable. In some situations it may still be useful though to filter objects based on properties that are inherent to them. This includes the object size as well as its type. Such a filter already exists in git-rev-list(1) with the `--filter=` command line option. While this option supports a couple of filters that are not applicable to our usecase, some of them are quite a neat fit. Wire up the filter as an option for git-cat-file(1). This allows us to reuse the same syntax as in git-rev-list(1) so that we don't have to reinvent the wheel. For now, we die when any of the filter options has been passed by the user, but they will be wired up in subsequent commits. Further note that the filters that we are about to introduce don't significantly speed up the runtime of git-cat-file(1). While we can skip emitting a lot of objects in case they are uninteresting to us, the majority of time is spent reading the packfile, which is bottlenecked by I/O and not the processor. This will change though once we start to make use of bitmaps, which will allow us to skip reading the whole packfile. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 16 +++++++++++++ builtin/cat-file.c | 36 +++++++++++++++++++++++++---- t/t1006-cat-file.sh | 40 +++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index d5890ae368..da92eed117 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -81,6 +81,15 @@ OPTIONS end-of-line conversion, etc). In this case, `` has to be of the form `:`, or `:`. +--filter=:: +--no-filter:: + Omit objects from the list of printed objects. This can only be used in + combination with one of the batched modes. Excluded objects that have + been explicitly requested via any of the batch modes that read objects + via standard input (`--batch`, `--batch-check`) will be reported as + "filtered". Excluded objects in `--batch-all-objects` mode will not be + printed at all. No filters are supported yet. + --path=:: For use with `--textconv` or `--filters`, to allow specifying an object name and a path separately, e.g. when it is difficult to figure out @@ -340,6 +349,13 @@ the repository, then `cat-file` will ignore any custom format and print: SP missing LF ------------ +If a name is specified on stdin that is filtered out via `--filter=`, +then `cat-file` will ignore any custom format and print: + +------------ + SP excluded LF +------------ + If a name is specified that might refer to more than one object (an ambiguous short sha), then `cat-file` will ignore any custom format and print: ------------ diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 1261a3ce35..0e2176c449 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -15,6 +15,7 @@ #include "gettext.h" #include "hex.h" #include "ident.h" +#include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" #include "streaming.h" @@ -35,6 +36,7 @@ enum batch_mode { }; struct batch_options { + struct list_objects_filter_options objects_filter; int enabled; int follow_symlinks; enum batch_mode batch_mode; @@ -495,6 +497,13 @@ static void batch_object_write(const char *obj_name, return; } + switch (opt->objects_filter.choice) { + case LOFC_DISABLED: + break; + default: + BUG("unsupported objects filter"); + } + if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) { size_t s = data->size; char *buf = NULL; @@ -820,7 +829,8 @@ static int batch_objects(struct batch_options *opt) struct object_cb_data cb; struct object_info empty = OBJECT_INFO_INIT; - if (!memcmp(&data.info, &empty, sizeof(empty))) + if (!memcmp(&data.info, &empty, sizeof(empty)) && + opt->objects_filter.choice == LOFC_DISABLED) data.skip_object_info = 1; if (repo_has_promisor_remote(the_repository)) @@ -944,10 +954,13 @@ int cmd_cat_file(int argc, int opt_cw = 0; int opt_epts = 0; const char *exp_type = NULL, *obj_name = NULL; - struct batch_options batch = {0}; + struct batch_options batch = { + .objects_filter = LIST_OBJECTS_FILTER_INIT, + }; int unknown_type = 0; int input_nul_terminated = 0; int nul_terminated = 0; + int ret; const char * const builtin_catfile_usage[] = { N_("git cat-file "), @@ -1008,6 +1021,7 @@ int cmd_cat_file(int argc, N_("run filters on object's content"), 'w'), OPT_STRING(0, "path", &force_path, N_("blob|tree"), N_("use a for (--textconv | --filters); Not with 'batch'")), + OPT_PARSE_LIST_OBJECTS_FILTER(&batch.objects_filter), OPT_END() }; @@ -1022,6 +1036,14 @@ int cmd_cat_file(int argc, if (use_mailmap) read_mailmap(&mailmap); + switch (batch.objects_filter.choice) { + case LOFC_DISABLED: + break; + default: + usagef(_("objects filter not supported: '%s'"), + list_object_filter_config_name(batch.objects_filter.choice)); + } + /* --batch-all-objects? */ if (opt == 'b') batch.all_objects = 1; @@ -1076,7 +1098,8 @@ int cmd_cat_file(int argc, usage_msg_opt(_("batch modes take no arguments"), builtin_catfile_usage, options); - return batch_objects(&batch); + ret = batch_objects(&batch); + goto out; } if (opt) { @@ -1108,5 +1131,10 @@ int cmd_cat_file(int argc, if (unknown_type && opt != 't' && opt != 's') die("git cat-file --allow-unknown-type: use with -s or -t"); - return cat_one_file(opt, exp_type, obj_name, unknown_type); + + ret = cat_one_file(opt, exp_type, obj_name, unknown_type); + +out: + list_objects_filter_release(&batch.objects_filter); + return ret; } diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 398865d6eb..9ce4eda6e6 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -1353,4 +1353,44 @@ test_expect_success PERL '--batch-command info is unbuffered by default' ' perl -e "$script" -- --batch-command $hello_oid "$expect" "info " ' +test_expect_success 'setup for objects filter' ' + git init repo +' + +test_expect_success 'objects filter with unknown option' ' + cat >expect <<-EOF && + fatal: invalid filter-spec ${SQ}unknown${SQ} + EOF + test_must_fail git -C repo cat-file --filter=unknown 2>err && + test_cmp expect err +' + +for option in blob:none blob:limit=1 object:type=tag sparse:oid=1234 tree:1 sparse:path=x +do + test_expect_success "objects filter with unsupported option $option" ' + case "$option" in + tree:1) + echo "usage: objects filter not supported: ${SQ}tree${SQ}" >expect + ;; + sparse:path=x) + echo "fatal: sparse:path filters support has been dropped" >expect + ;; + *) + option_name=$(echo "$option" | cut -d= -f1) && + printf "usage: objects filter not supported: ${SQ}%s${SQ}\n" "$option_name" >expect + ;; + esac && + test_must_fail git -C repo cat-file --filter=$option 2>err && + test_cmp expect err + ' +done + +test_expect_success 'objects filter: disabled' ' + git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --no-filter >actual && + sort actual >actual.sorted && + git -C repo rev-list --objects --no-object-names --all >expect && + sort expect >expect.sorted && + test_cmp expect.sorted actual.sorted +' + test_done From 3794e9bf982cde754a48b569a639bd2e180e754c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:39 +0200 Subject: [PATCH 04/11] builtin/cat-file: support "blob:none" objects filter Implement support for the "blob:none" filter in git-cat-file(1), which causes us to omit all blobs. Note that this new filter requires us to read the object type via `oid_object_info_extended()` in `batch_object_write()`. But as we try to optimize away reading objects from the database the `data->info.typep` pointer may not be set. We thus have to adapt the logic to conditionally set the pointer in cases where the filter is given. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 4 ++- builtin/cat-file.c | 15 ++++++++++- t/t1006-cat-file.sh | 47 +++++++++++++++++++++++++++++++-- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index da92eed117..afcdb0a473 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -88,7 +88,9 @@ OPTIONS been explicitly requested via any of the batch modes that read objects via standard input (`--batch`, `--batch-check`) will be reported as "filtered". Excluded objects in `--batch-all-objects` mode will not be - printed at all. No filters are supported yet. + printed at all. The '' may be one of the following: ++ +The form '--filter=blob:none' omits all blobs. --path=:: For use with `--textconv` or `--filters`, to allow specifying an object diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 0e2176c449..bcceb646f8 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -482,7 +482,8 @@ static void batch_object_write(const char *obj_name, if (!data->skip_object_info) { int ret; - if (use_mailmap) + if (use_mailmap || + opt->objects_filter.choice == LOFC_BLOB_NONE) data->info.typep = &data->type; if (pack) @@ -500,6 +501,14 @@ static void batch_object_write(const char *obj_name, switch (opt->objects_filter.choice) { case LOFC_DISABLED: break; + case LOFC_BLOB_NONE: + if (data->type == OBJ_BLOB) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; default: BUG("unsupported objects filter"); } @@ -1039,6 +1048,10 @@ int cmd_cat_file(int argc, switch (batch.objects_filter.choice) { case LOFC_DISABLED: break; + case LOFC_BLOB_NONE: + if (!batch.enabled) + usage(_("objects filter only supported in batch mode")); + break; default: usagef(_("objects filter not supported: '%s'"), list_object_filter_config_name(batch.objects_filter.choice)); diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 9ce4eda6e6..7404c135b1 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -1354,7 +1354,22 @@ test_expect_success PERL '--batch-command info is unbuffered by default' ' ' test_expect_success 'setup for objects filter' ' - git init repo + git init repo && + ( + # Seed the repository with three different sets of objects: + # + # - The first set is fully packed and has a bitmap. + # - The second set is packed, but has no bitmap. + # - The third set is loose. + # + # This ensures that we cover all these types as expected. + cd repo && + test_commit first && + git repack -Adb && + test_commit second && + git repack -d && + test_commit third + ) ' test_expect_success 'objects filter with unknown option' ' @@ -1365,7 +1380,7 @@ test_expect_success 'objects filter with unknown option' ' test_cmp expect err ' -for option in blob:none blob:limit=1 object:type=tag sparse:oid=1234 tree:1 sparse:path=x +for option in blob:limit=1 object:type=tag sparse:oid=1234 tree:1 sparse:path=x do test_expect_success "objects filter with unsupported option $option" ' case "$option" in @@ -1393,4 +1408,32 @@ test_expect_success 'objects filter: disabled' ' test_cmp expect.sorted actual.sorted ' +test_objects_filter () { + filter="$1" + + test_expect_success "objects filter: $filter" ' + git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --filter="$filter" >actual && + sort actual >actual.sorted && + git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >expect && + sort expect >expect.sorted && + test_cmp expect.sorted actual.sorted + ' + + test_expect_success "objects filter prints excluded objects: $filter" ' + # Find all objects that would be excluded by the current filter. + git -C repo rev-list --objects --no-object-names --all >all && + git -C repo rev-list --objects --no-object-names --all --filter="$filter" --filter-provided-objects >filtered && + sort all >all.sorted && + sort filtered >filtered.sorted && + comm -23 all.sorted filtered.sorted >expected.excluded && + test_line_count -gt 0 expected.excluded && + + git -C repo cat-file --batch-check="%(objectname)" --filter="$filter" actual && + awk "/excluded/{ print \$1 }" actual | sort >actual.excluded && + test_cmp expected.excluded actual.excluded + ' +} + +test_objects_filter "blob:none" + test_done From dbe1b32d59699092d549150e2db7af07e3cbfaf3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:40 +0200 Subject: [PATCH 05/11] builtin/cat-file: support "blob:limit=" objects filter Implement support for the "blob:limit=" filter in git-cat-file(1), which causes us to omit all blobs that are bigger than a certain size. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 5 +++++ builtin/cat-file.c | 15 ++++++++++++++- t/t1006-cat-file.sh | 18 +++++++++++++++--- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index afcdb0a473..48e05e1af5 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -91,6 +91,11 @@ OPTIONS printed at all. The '' may be one of the following: + The form '--filter=blob:none' omits all blobs. ++ +The form '--filter=blob:limit=[kmg]' omits blobs of size at least n +bytes or units. n may be zero. The suffixes k, m, and g can be used to name +units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as +'blob:limit=1024'. --path=:: For use with `--textconv` or `--filters`, to allow specifying an object diff --git a/builtin/cat-file.c b/builtin/cat-file.c index bcceb646f8..629c6cddcb 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -483,8 +483,11 @@ static void batch_object_write(const char *obj_name, int ret; if (use_mailmap || - opt->objects_filter.choice == LOFC_BLOB_NONE) + opt->objects_filter.choice == LOFC_BLOB_NONE || + opt->objects_filter.choice == LOFC_BLOB_LIMIT) data->info.typep = &data->type; + if (opt->objects_filter.choice == LOFC_BLOB_LIMIT) + data->info.sizep = &data->size; if (pack) ret = packed_object_info(the_repository, pack, offset, @@ -509,6 +512,15 @@ static void batch_object_write(const char *obj_name, return; } break; + case LOFC_BLOB_LIMIT: + if (data->type == OBJ_BLOB && + data->size >= opt->objects_filter.blob_limit_value) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; default: BUG("unsupported objects filter"); } @@ -1049,6 +1061,7 @@ int cmd_cat_file(int argc, case LOFC_DISABLED: break; case LOFC_BLOB_NONE: + case LOFC_BLOB_LIMIT: if (!batch.enabled) usage(_("objects filter only supported in batch mode")); break; diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 7404c135b1..4f14840b71 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -1356,11 +1356,12 @@ test_expect_success PERL '--batch-command info is unbuffered by default' ' test_expect_success 'setup for objects filter' ' git init repo && ( - # Seed the repository with three different sets of objects: + # Seed the repository with four different sets of objects: # # - The first set is fully packed and has a bitmap. # - The second set is packed, but has no bitmap. # - The third set is loose. + # - The fourth set is loose and contains big objects. # # This ensures that we cover all these types as expected. cd repo && @@ -1368,7 +1369,14 @@ test_expect_success 'setup for objects filter' ' git repack -Adb && test_commit second && git repack -d && - test_commit third + test_commit third && + + for n in 1000 10000 + do + printf "%"$n"s" X >large.$n || return 1 + done && + git add large.* && + git commit -m fourth ) ' @@ -1380,7 +1388,7 @@ test_expect_success 'objects filter with unknown option' ' test_cmp expect err ' -for option in blob:limit=1 object:type=tag sparse:oid=1234 tree:1 sparse:path=x +for option in object:type=tag sparse:oid=1234 tree:1 sparse:path=x do test_expect_success "objects filter with unsupported option $option" ' case "$option" in @@ -1435,5 +1443,9 @@ test_objects_filter () { } test_objects_filter "blob:none" +test_objects_filter "blob:limit=1" +test_objects_filter "blob:limit=500" +test_objects_filter "blob:limit=1000" +test_objects_filter "blob:limit=1k" test_done From 8fa9fe171a43b10c47268b6508ad4f39f2f628d6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:41 +0200 Subject: [PATCH 06/11] builtin/cat-file: support "object:type=" objects filter Implement support for the "object:type=" filter in git-cat-file(1), which causes us to omit all objects that don't match the provided object type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.adoc | 3 +++ builtin/cat-file.c | 12 +++++++++++- t/t1006-cat-file.sh | 6 +++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index 48e05e1af5..74d71c3282 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -96,6 +96,9 @@ The form '--filter=blob:limit=[kmg]' omits blobs of size at least n bytes or units. n may be zero. The suffixes k, m, and g can be used to name units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as 'blob:limit=1024'. ++ +The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects which +are not of the requested type. --path=:: For use with `--textconv` or `--filters`, to allow specifying an object diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 629c6cddcb..0f17175a54 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -484,7 +484,8 @@ static void batch_object_write(const char *obj_name, if (use_mailmap || opt->objects_filter.choice == LOFC_BLOB_NONE || - opt->objects_filter.choice == LOFC_BLOB_LIMIT) + opt->objects_filter.choice == LOFC_BLOB_LIMIT || + opt->objects_filter.choice == LOFC_OBJECT_TYPE) data->info.typep = &data->type; if (opt->objects_filter.choice == LOFC_BLOB_LIMIT) data->info.sizep = &data->size; @@ -521,6 +522,14 @@ static void batch_object_write(const char *obj_name, return; } break; + case LOFC_OBJECT_TYPE: + if (data->type != opt->objects_filter.object_type) { + if (!opt->all_objects) + report_object_status(opt, obj_name, + &data->oid, "excluded"); + return; + } + break; default: BUG("unsupported objects filter"); } @@ -1062,6 +1071,7 @@ int cmd_cat_file(int argc, break; case LOFC_BLOB_NONE: case LOFC_BLOB_LIMIT: + case LOFC_OBJECT_TYPE: if (!batch.enabled) usage(_("objects filter only supported in batch mode")); break; diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 4f14840b71..98638fa2b9 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -1388,7 +1388,7 @@ test_expect_success 'objects filter with unknown option' ' test_cmp expect err ' -for option in object:type=tag sparse:oid=1234 tree:1 sparse:path=x +for option in sparse:oid=1234 tree:1 sparse:path=x do test_expect_success "objects filter with unsupported option $option" ' case "$option" in @@ -1447,5 +1447,9 @@ test_objects_filter "blob:limit=1" test_objects_filter "blob:limit=500" test_objects_filter "blob:limit=1000" test_objects_filter "blob:limit=1k" +test_objects_filter "object:type=blob" +test_objects_filter "object:type=commit" +test_objects_filter "object:type=tag" +test_objects_filter "object:type=tree" test_done From 3d45483846368796d12f62d7d15daaa59d9d8a5c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:42 +0200 Subject: [PATCH 07/11] pack-bitmap: allow passing payloads to `show_reachable_fn()` The `show_reachable_fn` callback is used by a couple of functions to present reachable objects to the caller. The function does not provide a way for the caller to pass a payload though, which is functionality that we'll require in a subsequent commit. Change the callback type to accept a payload and adapt all callsites accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 3 ++- builtin/rev-list.c | 3 ++- pack-bitmap.c | 15 ++++++++------- pack-bitmap.h | 3 ++- reachable.c | 3 ++- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index a7e4bb7904..38784613fc 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1736,7 +1736,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, static int add_object_entry_from_bitmap(const struct object_id *oid, enum object_type type, int flags UNUSED, uint32_t name_hash, - struct packed_git *pack, off_t offset) + struct packed_git *pack, off_t offset, + void *payload UNUSED) { display_progress(progress_state, ++nr_seen); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index bb26bee0d4..1100dd2abe 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -429,7 +429,8 @@ static int show_object_fast( int exclude UNUSED, uint32_t name_hash UNUSED, struct packed_git *found_pack UNUSED, - off_t found_offset UNUSED) + off_t found_offset UNUSED, + void *payload UNUSED) { fprintf(stdout, "%s\n", oid_to_hex(oid)); return 1; diff --git a/pack-bitmap.c b/pack-bitmap.c index 6f7fd94c36..d192fb87da 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1625,7 +1625,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, (obj->type == OBJ_TAG && !revs->tag_objects)) continue; - show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0); + show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL); } } @@ -1663,7 +1663,8 @@ static void init_type_iterator(struct ewah_or_iterator *it, static void show_objects_for_type( struct bitmap_index *bitmap_git, enum object_type object_type, - show_reachable_fn show_reach) + show_reachable_fn show_reach, + void *payload) { size_t i = 0; uint32_t offset; @@ -1715,7 +1716,7 @@ static void show_objects_for_type( if (bitmap_git->hashes) hash = get_be32(bitmap_git->hashes + index_pos); - show_reach(&oid, object_type, 0, hash, pack, ofs); + show_reach(&oid, object_type, 0, hash, pack, ofs, payload); } } @@ -2518,13 +2519,13 @@ void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, { assert(bitmap_git->result); - show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable); + show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable, NULL); if (revs->tree_objects) - show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable); + show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable, NULL); if (revs->blob_objects) - show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable); + show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable, NULL); if (revs->tag_objects) - show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable); + show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable, NULL); show_extended_objects(bitmap_git, revs, show_reachable); } diff --git a/pack-bitmap.h b/pack-bitmap.h index dd0951088f..de6bf534fe 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -50,7 +50,8 @@ typedef int (*show_reachable_fn)( int flags, uint32_t hash, struct packed_git *found_pack, - off_t found_offset); + off_t found_offset, + void *payload); struct bitmap_index; diff --git a/reachable.c b/reachable.c index 9ee04c89ec..421d354d3b 100644 --- a/reachable.c +++ b/reachable.c @@ -341,7 +341,8 @@ static int mark_object_seen(const struct object_id *oid, int exclude UNUSED, uint32_t name_hash UNUSED, struct packed_git *found_pack UNUSED, - off_t found_offset UNUSED) + off_t found_offset UNUSED, + void *payload UNUSED) { struct object *obj = lookup_object_by_type(the_repository, oid, type); if (!obj) From 5420901bde8043a298b8ae5e5b3c3cfc1b67039b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:43 +0200 Subject: [PATCH 08/11] pack-bitmap: add function to iterate over filtered bitmapped objects Introduce a function that allows the caller to iterate over all bitmapped objects that match a given filter. This mechanism will be used in a subsequent commit to optimize object filters in git-cat-file(1). Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pack-bitmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------ pack-bitmap.h | 12 +++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index d192fb87da..6adb8aaa1c 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1662,6 +1662,7 @@ static void init_type_iterator(struct ewah_or_iterator *it, static void show_objects_for_type( struct bitmap_index *bitmap_git, + struct bitmap *objects, enum object_type object_type, show_reachable_fn show_reach, void *payload) @@ -1672,8 +1673,6 @@ static void show_objects_for_type( struct ewah_or_iterator it; eword_t filter; - struct bitmap *objects = bitmap_git->result; - init_type_iterator(&it, bitmap_git, object_type); for (i = 0; i < objects->word_alloc && @@ -2025,6 +2024,50 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git, } } +int for_each_bitmapped_object(struct bitmap_index *bitmap_git, + struct list_objects_filter_options *filter, + show_reachable_fn show_reach, + void *payload) +{ + struct bitmap *filtered_bitmap = NULL; + uint32_t objects_nr; + size_t full_word_count; + int ret; + + if (!can_filter_bitmap(filter)) { + ret = -1; + goto out; + } + + objects_nr = bitmap_num_objects(bitmap_git); + full_word_count = objects_nr / BITS_IN_EWORD; + + /* We start from the all-1 bitmap and then filter down from there. */ + filtered_bitmap = bitmap_word_alloc(full_word_count + !!(objects_nr % BITS_IN_EWORD)); + memset(filtered_bitmap->words, 0xff, full_word_count * sizeof(*filtered_bitmap->words)); + for (size_t i = full_word_count * BITS_IN_EWORD; i < objects_nr; i++) + bitmap_set(filtered_bitmap, i); + + if (filter_bitmap(bitmap_git, NULL, filtered_bitmap, filter) < 0) { + ret = -1; + goto out; + } + + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_COMMIT, show_reach, payload); + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_TREE, show_reach, payload); + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_BLOB, show_reach, payload); + show_objects_for_type(bitmap_git, filtered_bitmap, + OBJ_TAG, show_reach, payload); + + ret = 0; +out: + bitmap_free(filtered_bitmap); + return ret; +} + struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects) { @@ -2519,13 +2562,17 @@ void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, { assert(bitmap_git->result); - show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable, NULL); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_COMMIT, show_reachable, NULL); if (revs->tree_objects) - show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable, NULL); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_TREE, show_reachable, NULL); if (revs->blob_objects) - show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable, NULL); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_BLOB, show_reachable, NULL); if (revs->tag_objects) - show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable, NULL); + show_objects_for_type(bitmap_git, bitmap_git->result, + OBJ_TAG, show_reachable, NULL); show_extended_objects(bitmap_git, revs, show_reachable); } diff --git a/pack-bitmap.h b/pack-bitmap.h index de6bf534fe..079bae3246 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -79,6 +79,18 @@ int test_bitmap_pseudo_merges(struct repository *r); int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n); int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n); +struct list_objects_filter_options; + +/* + * Filter bitmapped objects and iterate through all resulting objects, + * executing `show_reach` for each of them. Returns `-1` in case the filter is + * not supported, `0` otherwise. + */ +int for_each_bitmapped_object(struct bitmap_index *bitmap_git, + struct list_objects_filter_options *filter, + show_reachable_fn show_reach, + void *payload); + #define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \ "GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL" From c9b94a7785b4de4e3e15d0e5b65c97337c206b81 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:44 +0200 Subject: [PATCH 09/11] pack-bitmap: introduce function to check whether a pack is bitmapped Introduce a function that allows us to verify whether a pack is bitmapped or not. This functionality will be used in a subsequent commit. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pack-bitmap.c | 15 +++++++++++++++ pack-bitmap.h | 7 +++++++ 2 files changed, 22 insertions(+) diff --git a/pack-bitmap.c b/pack-bitmap.c index 6adb8aaa1c..edc8f42122 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -745,6 +745,21 @@ struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx) return NULL; } +int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack) +{ + for (; bitmap; bitmap = bitmap->base) { + if (bitmap_is_midx(bitmap)) { + for (size_t i = 0; i < bitmap->midx->num_packs; i++) + if (bitmap->midx->packs[i] == pack) + return 1; + } else if (bitmap->pack == pack) { + return 1; + } + } + + return 0; +} + struct include_data { struct bitmap_index *bitmap_git; struct bitmap *base; diff --git a/pack-bitmap.h b/pack-bitmap.h index 079bae3246..55df1b3af5 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -67,6 +67,13 @@ struct bitmapped_pack { struct bitmap_index *prepare_bitmap_git(struct repository *r); struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx); + +/* + * Given a bitmap index, determine whether it contains the pack either directly + * or via the multi-pack-index. + */ +int bitmap_index_contains_pack(struct bitmap_index *bitmap, struct packed_git *pack); + void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags); void traverse_bitmap_commit_list(struct bitmap_index *, From d5ec7027bcbf755b95fba84ad1de50ca6d3c3daa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:45 +0200 Subject: [PATCH 10/11] builtin/cat-file: deduplicate logic to iterate over all objects Pull out a common function that allows us to iterate over all objects in a repository. Right now the logic is trivial and would only require two function calls, making this refactoring a bit pointless. But in the next commit we will iterate on this logic to make use of bitmaps, so this is about to become a bit more complex. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 85 ++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 0f17175a54..b0c758eca0 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -642,25 +642,18 @@ static int batch_object_cb(const struct object_id *oid, void *vdata) return 0; } -static int collect_loose_object(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - oid_array_append(data, oid); - return 0; -} - -static int collect_packed_object(const struct object_id *oid, - struct packed_git *pack UNUSED, - uint32_t pos UNUSED, - void *data) +static int collect_object(const struct object_id *oid, + struct packed_git *pack UNUSED, + off_t offset UNUSED, + void *data) { oid_array_append(data, oid); return 0; } static int batch_unordered_object(const struct object_id *oid, - struct packed_git *pack, off_t offset, + struct packed_git *pack, + off_t offset, void *vdata) { struct object_cb_data *data = vdata; @@ -674,23 +667,6 @@ static int batch_unordered_object(const struct object_id *oid, return 0; } -static int batch_unordered_loose(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - return batch_unordered_object(oid, NULL, 0, data); -} - -static int batch_unordered_packed(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data) -{ - return batch_unordered_object(oid, pack, - nth_packed_object_offset(pack, pos), - data); -} - typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *, struct strbuf *, struct expand_data *); @@ -823,6 +799,45 @@ static void batch_objects_command(struct batch_options *opt, #define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)" +typedef int (*for_each_object_fn)(const struct object_id *oid, struct packed_git *pack, + off_t offset, void *data); + +struct for_each_object_payload { + for_each_object_fn callback; + void *payload; +}; + +static int batch_one_object_loose(const struct object_id *oid, + const char *path UNUSED, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, NULL, 0, payload->payload); +} + +static int batch_one_object_packed(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, pack, nth_packed_object_offset(pack, pos), + payload->payload); +} + +static void batch_each_object(for_each_object_fn callback, + unsigned flags, + void *_payload) +{ + struct for_each_object_payload payload = { + .callback = callback, + .payload = _payload, + }; + for_each_loose_object(batch_one_object_loose, &payload, 0); + for_each_packed_object(the_repository, batch_one_object_packed, + &payload, flags); +} + static int batch_objects(struct batch_options *opt) { struct strbuf input = STRBUF_INIT; @@ -877,18 +892,14 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; - for_each_loose_object(batch_unordered_loose, &cb, 0); - for_each_packed_object(the_repository, batch_unordered_packed, - &cb, FOR_EACH_OBJECT_PACK_ORDER); + batch_each_object(batch_unordered_object, + FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { struct oid_array sa = OID_ARRAY_INIT; - for_each_loose_object(collect_loose_object, &sa, 0); - for_each_packed_object(the_repository, collect_packed_object, - &sa, 0); - + batch_each_object(collect_object, 0, &sa); oid_array_for_each_unique(&sa, batch_object_cb, &cb); oid_array_clear(&sa); From 8002e8ee1829f0c727aa2f7d9c18ad706cb63565 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 2 Apr 2025 13:13:46 +0200 Subject: [PATCH 11/11] builtin/cat-file: use bitmaps to efficiently filter by object type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it is now possible to filter objects by type, this mechanism is for now mostly a convenience. Most importantly, we still have to iterate through the whole packfile to find all objects of a specific type. This can be prohibitively expensive depending on the size of the packfiles. It isn't really possible to do better than this when only considering a packfile itself, as the order of objects is not fixed. But when we have a packfile with a corresponding bitmap, either because the packfile itself has one or because the multi-pack index has a bitmap for it, then we can use these bitmaps to improve the runtime. While bitmaps are typically used to compute reachability of objects, they also contain one bitmap per object type that encodes which object has what type. So instead of reading through the whole packfile(s), we can use the bitmaps and iterate through the type-specific bitmap. Typically, only a subset of packfiles will have a bitmap. But this isn't really much of a problem: we can use bitmaps when available, and then use the non-bitmap walk for every packfile that isn't covered by one. Overall, this leads to quite a significant speedup depending on how many objects of a certain type exist. The following benchmarks have been executed in the Chromium repository, which has a 50GB packfile with almost 25 million objects. As expected, there isn't really much of a change in performance without an object filter: Benchmark 1: cat-file with no-filter (revision = HEAD~) Time (mean ± σ): 89.675 s ± 4.527 s [User: 40.807 s, System: 10.782 s] Range (min … max): 83.052 s … 96.084 s 10 runs Benchmark 2: cat-file with no-filter (revision = HEAD) Time (mean ± σ): 88.991 s ± 2.488 s [User: 42.278 s, System: 10.305 s] Range (min … max): 82.843 s … 91.271 s 10 runs Summary cat-file with no-filter (revision = HEAD) ran 1.01 ± 0.06 times faster than cat-file with no-filter (revision = HEAD~) We still have to scan through all objects as we yield all of them, so using the bitmap in this case doesn't really buy us anything. What is noticeable in this benchmark is that we're I/O-bound, not CPU-bound, as can be seen from the user/system runtimes, which combined are way lower than the overall benchmarked runtime. But when we do use a filter we can see a significant improvement: Benchmark 1: cat-file with filter=object:type=commit (revision = HEAD~) Time (mean ± σ): 86.444 s ± 4.081 s [User: 36.830 s, System: 11.312 s] Range (min … max): 80.305 s … 93.104 s 10 runs Benchmark 2: cat-file with filter=object:type=commit (revision = HEAD) Time (mean ± σ): 2.089 s ± 0.015 s [User: 1.872 s, System: 0.207 s] Range (min … max): 2.073 s … 2.119 s 10 runs Summary cat-file with filter=object:type=commit (revision = HEAD) ran 41.38 ± 1.98 times faster than cat-file with filter=object:type=commit (revision = HEAD~) This is because we don't have to scan through all packfiles anymore, but can instead directly look up relevant objects. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b0c758eca0..ead7554a57 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -21,6 +21,7 @@ #include "streaming.h" #include "oid-array.h" #include "packfile.h" +#include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "object-store-ll.h" @@ -825,7 +826,20 @@ static int batch_one_object_packed(const struct object_id *oid, payload->payload); } -static void batch_each_object(for_each_object_fn callback, +static int batch_one_object_bitmapped(const struct object_id *oid, + enum object_type type UNUSED, + int flags UNUSED, + uint32_t hash UNUSED, + struct packed_git *pack, + off_t offset, + void *_payload) +{ + struct for_each_object_payload *payload = _payload; + return payload->callback(oid, pack, offset, payload->payload); +} + +static void batch_each_object(struct batch_options *opt, + for_each_object_fn callback, unsigned flags, void *_payload) { @@ -833,9 +847,27 @@ static void batch_each_object(for_each_object_fn callback, .callback = callback, .payload = _payload, }; + struct bitmap_index *bitmap = prepare_bitmap_git(the_repository); + for_each_loose_object(batch_one_object_loose, &payload, 0); - for_each_packed_object(the_repository, batch_one_object_packed, - &payload, flags); + + if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter, + batch_one_object_bitmapped, &payload)) { + struct packed_git *pack; + + for (pack = get_all_packs(the_repository); pack; pack = pack->next) { + if (bitmap_index_contains_pack(bitmap, pack) || + open_pack_index(pack)) + continue; + for_each_object_in_pack(pack, batch_one_object_packed, + &payload, flags); + } + } else { + for_each_packed_object(the_repository, batch_one_object_packed, + &payload, flags); + } + + free_bitmap_index(bitmap); } static int batch_objects(struct batch_options *opt) @@ -892,14 +924,14 @@ static int batch_objects(struct batch_options *opt) cb.seen = &seen; - batch_each_object(batch_unordered_object, + batch_each_object(opt, batch_unordered_object, FOR_EACH_OBJECT_PACK_ORDER, &cb); oidset_clear(&seen); } else { struct oid_array sa = OID_ARRAY_INIT; - batch_each_object(collect_object, 0, &sa); + batch_each_object(opt, collect_object, 0, &sa); oid_array_for_each_unique(&sa, batch_object_cb, &cb); oid_array_clear(&sa);