mirror of
https://github.com/git-for-windows/git.git
synced 2026-05-30 08:08:35 -05:00
survey: add report of "largest" paths
Since we are already walking our reachable objects using the path-walk API, let's now collect lists of the paths that contribute most to different metrics. Specifically, we care about * Number of versions. * Total size on disk. * Total inflated size (no delta or zlib compression). This information can be critical to discovering which parts of the repository are causing the most growth, especially on-disk size. Different packing strategies might help compress data more efficiently, but the toal inflated size is a representation of the raw size of all snapshots of those paths. Even when stored efficiently on disk, that size represents how much information must be processed to complete a command such as 'git blame'. Since the on-disk size is likely to be fragile, stop testing the exact output of 'git survey' and check that the correct set of headers is output. Signed-off-by: Derrick Stolee <stolee@gmail.com>
This commit is contained in:
committed by
Johannes Schindelin
parent
2c1b1631d5
commit
64b2ec2251
@@ -75,7 +75,6 @@ struct survey_report_object_size_summary {
|
||||
|
||||
typedef int (*survey_top_cmp)(void *v1, void *v2);
|
||||
|
||||
MAYBE_UNUSED
|
||||
static int cmp_by_nr(void *v1, void *v2)
|
||||
{
|
||||
struct survey_report_object_size_summary *s1 = v1;
|
||||
@@ -88,7 +87,6 @@ static int cmp_by_nr(void *v1, void *v2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
MAYBE_UNUSED
|
||||
static int cmp_by_disk_size(void *v1, void *v2)
|
||||
{
|
||||
struct survey_report_object_size_summary *s1 = v1;
|
||||
@@ -101,7 +99,6 @@ static int cmp_by_disk_size(void *v1, void *v2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
MAYBE_UNUSED
|
||||
static int cmp_by_inflated_size(void *v1, void *v2)
|
||||
{
|
||||
struct survey_report_object_size_summary *s1 = v1;
|
||||
@@ -132,7 +129,6 @@ struct survey_report_top_table {
|
||||
void *data;
|
||||
};
|
||||
|
||||
MAYBE_UNUSED
|
||||
static void init_top_sizes(struct survey_report_top_table *top,
|
||||
size_t limit, const char *name,
|
||||
survey_top_cmp cmp)
|
||||
@@ -158,7 +154,6 @@ static void clear_top_sizes(struct survey_report_top_table *top)
|
||||
free(sz_array);
|
||||
}
|
||||
|
||||
MAYBE_UNUSED
|
||||
static void maybe_insert_into_top_size(struct survey_report_top_table *top,
|
||||
struct survey_report_object_size_summary *summary)
|
||||
{
|
||||
@@ -195,6 +190,10 @@ struct survey_report {
|
||||
struct survey_report_object_summary reachable_objects;
|
||||
|
||||
struct survey_report_object_size_summary *by_type;
|
||||
|
||||
struct survey_report_top_table *top_paths_by_count;
|
||||
struct survey_report_top_table *top_paths_by_disk;
|
||||
struct survey_report_top_table *top_paths_by_inflate;
|
||||
};
|
||||
|
||||
#define REPORT_TYPE_COMMIT 0
|
||||
@@ -446,6 +445,13 @@ static void survey_report_object_sizes(const char *title,
|
||||
clear_table(&table);
|
||||
}
|
||||
|
||||
static void survey_report_plaintext_sorted_size(
|
||||
struct survey_report_top_table *top)
|
||||
{
|
||||
survey_report_object_sizes(top->name, _("Path"),
|
||||
top->data, top->nr);
|
||||
}
|
||||
|
||||
static void survey_report_plaintext(struct survey_context *ctx)
|
||||
{
|
||||
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
|
||||
@@ -456,6 +462,21 @@ static void survey_report_plaintext(struct survey_context *ctx)
|
||||
_("Object Type"),
|
||||
ctx->report.by_type,
|
||||
REPORT_TYPE_COUNT);
|
||||
|
||||
survey_report_plaintext_sorted_size(
|
||||
&ctx->report.top_paths_by_count[REPORT_TYPE_TREE]);
|
||||
survey_report_plaintext_sorted_size(
|
||||
&ctx->report.top_paths_by_count[REPORT_TYPE_BLOB]);
|
||||
|
||||
survey_report_plaintext_sorted_size(
|
||||
&ctx->report.top_paths_by_disk[REPORT_TYPE_TREE]);
|
||||
survey_report_plaintext_sorted_size(
|
||||
&ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB]);
|
||||
|
||||
survey_report_plaintext_sorted_size(
|
||||
&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE]);
|
||||
survey_report_plaintext_sorted_size(
|
||||
&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB]);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -696,7 +717,8 @@ static void increment_totals(struct survey_context *ctx,
|
||||
|
||||
static void increment_object_totals(struct survey_context *ctx,
|
||||
struct oid_array *oids,
|
||||
enum object_type type)
|
||||
enum object_type type,
|
||||
const char *path)
|
||||
{
|
||||
struct survey_report_object_size_summary *total;
|
||||
struct survey_report_object_size_summary summary = { 0 };
|
||||
@@ -728,6 +750,27 @@ static void increment_object_totals(struct survey_context *ctx,
|
||||
total->disk_size += summary.disk_size;
|
||||
total->inflated_size += summary.inflated_size;
|
||||
total->num_missing += summary.num_missing;
|
||||
|
||||
if (type == OBJ_TREE || type == OBJ_BLOB) {
|
||||
int index = type == OBJ_TREE ?
|
||||
REPORT_TYPE_TREE : REPORT_TYPE_BLOB;
|
||||
struct survey_report_top_table *top;
|
||||
|
||||
/*
|
||||
* Temporarily store (const char *) here, but it will
|
||||
* be duped if inserted and will not be freed.
|
||||
*/
|
||||
summary.label = (char *)path;
|
||||
|
||||
top = ctx->report.top_paths_by_count;
|
||||
maybe_insert_into_top_size(&top[index], &summary);
|
||||
|
||||
top = ctx->report.top_paths_by_disk;
|
||||
maybe_insert_into_top_size(&top[index], &summary);
|
||||
|
||||
top = ctx->report.top_paths_by_inflate;
|
||||
maybe_insert_into_top_size(&top[index], &summary);
|
||||
}
|
||||
}
|
||||
|
||||
static int survey_objects_path_walk_fn(const char *path,
|
||||
@@ -739,7 +782,7 @@ static int survey_objects_path_walk_fn(const char *path,
|
||||
|
||||
increment_object_counts(&ctx->report.reachable_objects,
|
||||
type, oids->nr);
|
||||
increment_object_totals(ctx, oids, type);
|
||||
increment_object_totals(ctx, oids, type, path);
|
||||
|
||||
ctx->progress_nr += oids->nr;
|
||||
display_progress(ctx->progress, ctx->progress_nr);
|
||||
@@ -749,11 +792,31 @@ static int survey_objects_path_walk_fn(const char *path,
|
||||
|
||||
static void initialize_report(struct survey_context *ctx)
|
||||
{
|
||||
const int top_limit = 100;
|
||||
|
||||
CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
|
||||
ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
|
||||
ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
|
||||
ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
|
||||
ctx->report.by_type[REPORT_TYPE_TAG].label = xstrdup(_("Tags"));
|
||||
|
||||
CALLOC_ARRAY(ctx->report.top_paths_by_count, REPORT_TYPE_COUNT);
|
||||
init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_TREE],
|
||||
top_limit, _("TOP DIRECTORIES BY COUNT"), cmp_by_nr);
|
||||
init_top_sizes(&ctx->report.top_paths_by_count[REPORT_TYPE_BLOB],
|
||||
top_limit, _("TOP FILES BY COUNT"), cmp_by_nr);
|
||||
|
||||
CALLOC_ARRAY(ctx->report.top_paths_by_disk, REPORT_TYPE_COUNT);
|
||||
init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_TREE],
|
||||
top_limit, _("TOP DIRECTORIES BY DISK SIZE"), cmp_by_disk_size);
|
||||
init_top_sizes(&ctx->report.top_paths_by_disk[REPORT_TYPE_BLOB],
|
||||
top_limit, _("TOP FILES BY DISK SIZE"), cmp_by_disk_size);
|
||||
|
||||
CALLOC_ARRAY(ctx->report.top_paths_by_inflate, REPORT_TYPE_COUNT);
|
||||
init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE],
|
||||
top_limit, _("TOP DIRECTORIES BY INFLATED SIZE"), cmp_by_inflated_size);
|
||||
init_top_sizes(&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB],
|
||||
top_limit, _("TOP FILES BY INFLATED SIZE"), cmp_by_inflated_size);
|
||||
}
|
||||
|
||||
static void survey_phase_objects(struct survey_context *ctx)
|
||||
|
||||
@@ -86,7 +86,17 @@ test_expect_success 'git survey (default)' '
|
||||
Tags | 4 | $(test_oid tags_size_on_disk) | $(test_oid tags_size)
|
||||
EOF
|
||||
|
||||
test_cmp expect out
|
||||
lines=$(wc -l <expect) &&
|
||||
head -n $lines out >out-trimmed &&
|
||||
test_cmp expect out-trimmed &&
|
||||
|
||||
for type in "DIRECTORIES" "FILES"
|
||||
do
|
||||
for metric in "COUNT" "DISK SIZE" "INFLATED SIZE"
|
||||
do
|
||||
grep "TOP $type BY $metric" out || return 1
|
||||
done || return 1
|
||||
done
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
Reference in New Issue
Block a user