mirror of
https://github.com/git-for-windows/git.git
synced 2026-02-28 09:18:06 -06:00
pack-objects: add --path-walk option
In order to more easily compute delta bases among objects that appear at the exact same path, add a --path-walk option to 'git pack-objects'. This option will use the path-walk API instead of the object walk given by the revision machinery. Since objects will be provided in batches representing a common path, those objects can be tested for delta bases immediately instead of waiting for a sort of the full object list by name-hash. This has multiple benefits, including avoiding collisions by name-hash. The objects marked as UNINTERESTING are included in these batches, so we are guaranteeing some locality to find good delta bases. After the individual passes are done on a per-path basis, the default name-hash is used to find other opportunistic delta bases that did not match exactly by the full path name. RFC TODO: It is important to note that this option is inherently incompatible with using a bitmap index. This walk probably also does not work with other advanced features, such as delta islands. Getting ahead of myself, this option compares well with --full-name-hash when the packfile is large enough, but also performs at least as well as the default in all cases that I've seen. RFC TODO: this should probably be recording the batch locations to another list so they could be processed in a second phase using threads. RFC TODO: list some examples of how this outperforms previous pack-objects strategies. (This is coming in later commits that include performance test changes.) Signed-off-by: Derrick Stolee <stolee@gmail.com>
This commit is contained in:
parent
2ebf07375b
commit
7f4a2a2d9e
@ -16,7 +16,7 @@ SYNOPSIS
|
||||
[--cruft] [--cruft-expiration=<time>]
|
||||
[--stdout [--filter=<filter-spec>] | <base-name>]
|
||||
[--shallow] [--keep-true-parents] [--[no-]sparse]
|
||||
[--name-hash-version=<n>] < <object-list>
|
||||
[--name-hash-version=<n>] [--path-walk] < <object-list>
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
@ -375,6 +375,16 @@ many different directories. At the moment, this version is not allowed
|
||||
when writing reachability bitmap files with `--write-bitmap-index` and it
|
||||
will be automatically changed to version `1`.
|
||||
|
||||
--path-walk::
|
||||
By default, `git pack-objects` walks objects in an order that
|
||||
presents trees and blobs in an order unrelated to the path they
|
||||
appear relative to a commit's root tree. The `--path-walk` option
|
||||
enables a different walking algorithm that organizes trees and
|
||||
blobs by path. This has the potential to improve delta compression
|
||||
especially in the presence of filenames that cause collisions in
|
||||
Git's default name-hash algorithm. Due to changing how the objects
|
||||
are walked, this option is not compatible with `--delta-islands`,
|
||||
`--shallow`, or `--filter`.
|
||||
|
||||
DELTA ISLANDS
|
||||
-------------
|
||||
|
||||
@ -70,3 +70,4 @@ Examples
|
||||
See example usages in:
|
||||
`t/helper/test-path-walk.c`,
|
||||
`builtin/backfill.c`
|
||||
`builtin/pack-objects.c`
|
||||
|
||||
@ -41,6 +41,9 @@
|
||||
#include "promisor-remote.h"
|
||||
#include "pack-mtimes.h"
|
||||
#include "parse-options.h"
|
||||
#include "blob.h"
|
||||
#include "tree.h"
|
||||
#include "path-walk.h"
|
||||
|
||||
/*
|
||||
* Objects we are going to pack are collected in the `to_pack` structure.
|
||||
@ -217,6 +220,7 @@ static int delta_search_threads;
|
||||
static int pack_to_stdout;
|
||||
static int sparse;
|
||||
static int thin;
|
||||
static int path_walk;
|
||||
static int num_preferred_base;
|
||||
static struct progress *progress_state;
|
||||
|
||||
@ -4188,6 +4192,105 @@ static void mark_bitmap_preferred_tips(void)
|
||||
}
|
||||
}
|
||||
|
||||
static inline int is_oid_interesting(struct repository *repo,
|
||||
struct object_id *oid)
|
||||
{
|
||||
struct object *o = lookup_object(repo, oid);
|
||||
return o && !(o->flags & UNINTERESTING);
|
||||
}
|
||||
|
||||
static int add_objects_by_path(const char *path,
|
||||
struct oid_array *oids,
|
||||
enum object_type type,
|
||||
void *data)
|
||||
{
|
||||
struct object_entry **delta_list;
|
||||
size_t oe_start = to_pack.nr_objects;
|
||||
size_t oe_end;
|
||||
unsigned int sub_list_size;
|
||||
unsigned int *processed = data;
|
||||
|
||||
/*
|
||||
* First, add all objects to the packing data, including the ones
|
||||
* marked UNINTERESTING (translated to 'exclude') as they can be
|
||||
* used as delta bases.
|
||||
*/
|
||||
for (size_t i = 0; i < oids->nr; i++) {
|
||||
int exclude;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
struct object_id *oid = &oids->oid[i];
|
||||
|
||||
/* Skip objects that do not exist locally. */
|
||||
if (exclude_promisor_objects &&
|
||||
oid_object_info_extended(the_repository, oid, &oi,
|
||||
OBJECT_INFO_FOR_PREFETCH) < 0)
|
||||
continue;
|
||||
|
||||
exclude = !is_oid_interesting(the_repository, oid);
|
||||
|
||||
if (exclude && !thin)
|
||||
continue;
|
||||
|
||||
add_object_entry(oid, type, path, exclude);
|
||||
}
|
||||
|
||||
oe_end = to_pack.nr_objects;
|
||||
|
||||
/* We can skip delta calculations if it is a no-op. */
|
||||
if (oe_end == oe_start || !window)
|
||||
return 0;
|
||||
|
||||
sub_list_size = 0;
|
||||
ALLOC_ARRAY(delta_list, oe_end - oe_start);
|
||||
|
||||
for (size_t i = 0; i < oe_end - oe_start; i++) {
|
||||
struct object_entry *entry = to_pack.objects + oe_start + i;
|
||||
|
||||
if (!should_attempt_deltas(entry))
|
||||
continue;
|
||||
|
||||
delta_list[sub_list_size++] = entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find delta bases among this list of objects that all match the same
|
||||
* path. This causes the delta compression to be interleaved in the
|
||||
* object walk, which can lead to confusing progress indicators. This is
|
||||
* also incompatible with threaded delta calculations. In the future,
|
||||
* consider creating a list of regions in the full to_pack.objects array
|
||||
* that could be picked up by the threaded delta computation.
|
||||
*/
|
||||
if (sub_list_size && window) {
|
||||
QSORT(delta_list, sub_list_size, type_size_sort);
|
||||
find_deltas(delta_list, &sub_list_size, window, depth, processed);
|
||||
}
|
||||
|
||||
free(delta_list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void get_object_list_path_walk(struct rev_info *revs)
|
||||
{
|
||||
struct path_walk_info info = PATH_WALK_INFO_INIT;
|
||||
unsigned int processed = 0;
|
||||
|
||||
info.revs = revs;
|
||||
info.path_fn = add_objects_by_path;
|
||||
info.path_fn_data = &processed;
|
||||
revs->tag_objects = 1;
|
||||
|
||||
/*
|
||||
* Allow the --[no-]sparse option to be interesting here, if only
|
||||
* for testing purposes. Paths with no interesting objects will not
|
||||
* contribute to the resulting pack, but only create noisy preferred
|
||||
* base objects.
|
||||
*/
|
||||
info.prune_all_uninteresting = sparse;
|
||||
|
||||
if (walk_objects_by_path(&info))
|
||||
die(_("failed to pack objects via path-walk"));
|
||||
}
|
||||
|
||||
static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
||||
{
|
||||
struct setup_revision_opt s_r_opt = {
|
||||
@ -4234,7 +4337,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
||||
|
||||
warn_on_object_refname_ambiguity = save_warning;
|
||||
|
||||
if (use_bitmap_index && !get_object_list_from_bitmap(revs))
|
||||
if (use_bitmap_index && !path_walk && !get_object_list_from_bitmap(revs))
|
||||
return;
|
||||
|
||||
if (use_delta_islands)
|
||||
@ -4243,15 +4346,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
||||
if (write_bitmap_index)
|
||||
mark_bitmap_preferred_tips();
|
||||
|
||||
if (prepare_revision_walk(revs))
|
||||
die(_("revision walk setup failed"));
|
||||
mark_edges_uninteresting(revs, show_edge, sparse);
|
||||
|
||||
if (!fn_show_object)
|
||||
fn_show_object = show_object;
|
||||
traverse_commit_list(revs,
|
||||
show_commit, fn_show_object,
|
||||
NULL);
|
||||
|
||||
if (path_walk) {
|
||||
get_object_list_path_walk(revs);
|
||||
} else {
|
||||
if (prepare_revision_walk(revs))
|
||||
die(_("revision walk setup failed"));
|
||||
mark_edges_uninteresting(revs, show_edge, sparse);
|
||||
traverse_commit_list(revs,
|
||||
show_commit, fn_show_object,
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (unpack_unreachable_expiration) {
|
||||
revs->ignore_missing_links = 1;
|
||||
@ -4461,6 +4568,8 @@ int cmd_pack_objects(int argc,
|
||||
N_("use the sparse reachability algorithm")),
|
||||
OPT_BOOL(0, "thin", &thin,
|
||||
N_("create thin packs")),
|
||||
OPT_BOOL(0, "path-walk", &path_walk,
|
||||
N_("use the path-walk API to walk objects when possible")),
|
||||
OPT_BOOL(0, "shallow", &shallow,
|
||||
N_("create packs suitable for shallow fetches")),
|
||||
OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
|
||||
@ -4546,7 +4655,27 @@ int cmd_pack_objects(int argc,
|
||||
window = 0;
|
||||
|
||||
strvec_push(&rp, "pack-objects");
|
||||
if (thin) {
|
||||
|
||||
if (path_walk && filter_options.choice) {
|
||||
warning(_("cannot use --filter with --path-walk"));
|
||||
path_walk = 0;
|
||||
}
|
||||
if (path_walk && use_delta_islands) {
|
||||
warning(_("cannot use delta islands with --path-walk"));
|
||||
path_walk = 0;
|
||||
}
|
||||
if (path_walk && shallow) {
|
||||
warning(_("cannot use --shallow with --path-walk"));
|
||||
path_walk = 0;
|
||||
}
|
||||
if (path_walk) {
|
||||
strvec_push(&rp, "--boundary");
|
||||
/*
|
||||
* We must disable the bitmaps because we are removing
|
||||
* the --objects / --objects-edge[-aggressive] options.
|
||||
*/
|
||||
use_bitmap_index = 0;
|
||||
} else if (thin) {
|
||||
use_internal_rev_list = 1;
|
||||
strvec_push(&rp, shallow
|
||||
? "--objects-edge-aggressive"
|
||||
|
||||
@ -723,4 +723,21 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
|
||||
! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err
|
||||
'
|
||||
|
||||
# Basic "repack everything" test
|
||||
test_expect_success '--path-walk pack everything' '
|
||||
git -C server rev-parse HEAD >in &&
|
||||
git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
|
||||
git -C server index-pack --stdin <out.pack
|
||||
'
|
||||
|
||||
# Basic "thin pack" test
|
||||
test_expect_success '--path-walk thin pack' '
|
||||
cat >in <<-EOF &&
|
||||
$(git -C server rev-parse HEAD)
|
||||
^$(git -C server rev-parse HEAD~2)
|
||||
EOF
|
||||
git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
|
||||
git -C server index-pack --fix-thin --stdin <out.pack
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user