mirror of
https://github.com/git-for-windows/git.git
synced 2026-02-03 18:59:59 -06:00
Since we are already walking our reachable objects using the path-walk API,
let's now collect lists of the paths that contribute most to different
metrics. Specifically, we care about
* Number of versions.
* Total size on disk.
* Total inflated size (no delta or zlib compression).
This information can be critical to discovering which parts of the
repository are causing the most growth, especially on-disk size. Different
packing strategies might help compress data more efficiently, but the toal
inflated size is a representation of the raw size of all snapshots of those
paths. Even when stored efficiently on disk, that size represents how much
information must be processed to complete a command such as 'git blame'.
The exact disk size seems to be not quite robust enough for testing, as
could be seen by the `linux-musl-meson` job consistently failing, possibly
because of zlib-ng deflates differently: t8100.4(git survey
(default)) was failing with a symptom like this:
TOTAL OBJECT SIZES BY TYPE
===============================================
Object Type | Count | Disk Size | Inflated Size
------------+-------+-----------+--------------
- Commits | 10 | 1523 | 2153
+ Commits | 10 | 1528 | 2153
Trees | 10 | 495 | 1706
Blobs | 10 | 191 | 101
- Tags | 4 | 510 | 528
+ Tags | 4 | 547 | 528
This means: the disk size is unlikely something we can verify robustly.
Since zlib-ng seems to increase the disk size of the tags from 528 to
547, we cannot even assume that the disk size is always smaller than the
inflated size. We will most likely want to either skip verifying the
disk size altogether, or go for some kind of fuzzy matching, say, by
replacing `s/ 1[45][0-9][0-9] / ~1.5k /` and `s/ [45][0-9][0-9] / ~½k /`
or something like that.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
109 lines
2.9 KiB
Bash
Executable File
109 lines
2.9 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
test_description='git survey'
|
|
|
|
GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
|
|
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
|
|
|
|
TEST_PASSES_SANITIZE_LEAK=0
|
|
export TEST_PASSES_SANITIZE_LEAK
|
|
|
|
. ./test-lib.sh
|
|
|
|
test_expect_success 'git survey -h shows experimental warning' '
|
|
test_expect_code 129 git survey -h >usage &&
|
|
grep "EXPERIMENTAL!" usage
|
|
'
|
|
|
|
test_expect_success 'create a semi-interesting repo' '
|
|
test_commit_bulk 10 &&
|
|
git tag -a -m one one HEAD~5 &&
|
|
git tag -a -m two two HEAD~3 &&
|
|
git tag -a -m three three two &&
|
|
git tag -a -m four four three &&
|
|
git update-ref -d refs/tags/three &&
|
|
git update-ref -d refs/tags/two
|
|
'
|
|
|
|
test_expect_success 'git survey --progress' '
|
|
GIT_PROGRESS_DELAY=0 git survey --all-refs --progress >out 2>err &&
|
|
grep "Preparing object walk" err
|
|
'
|
|
|
|
approximate_sizes() {
|
|
# very simplistic approximate rounding
|
|
sed -Ee "s/ *(1[0-9][0-9])( |$)/ ~0.1kB\2/g" \
|
|
-e "s/ *(4[6-9][0-9]|5[0-6][0-9])( |$)/ ~0.5kB\2/g" \
|
|
-e "s/ *(5[6-9][0-9]|6[0-6][0-9])( |$)/ ~0.6kB\2/g" \
|
|
-e "s/ *1(4[89][0-9]|5[0-8][0-9])( |$)/ ~1.5kB\2/g" \
|
|
-e "s/ *1(69[0-9]|7[0-9][0-9])( |$)/ ~1.7kB\2/g" \
|
|
-e "s/ *1(79[0-9]|8[0-9][0-9])( |$)/ ~1.8kB\2/g" \
|
|
-e "s/ *2(1[0-9][0-9]|20[0-1])( |$)/ ~2.1kB\2/g" \
|
|
-e "s/ *2(3[0-9][0-9]|4[0-1][0-9])( |$)/ ~2.3kB\2/g" \
|
|
-e "s/ *2(5[0-9][0-9]|6[0-1][0-9])( |$)/ ~2.5kB\2/g" \
|
|
"$@"
|
|
}
|
|
|
|
test_expect_success 'git survey (default)' '
|
|
git survey --all-refs >out 2>err &&
|
|
test_line_count = 0 err &&
|
|
|
|
test_oid_cache <<-EOF &&
|
|
commits_sizes sha1:~1.5kB | ~2.1kB
|
|
commits_sizes sha256:~1.8kB | ~2.5kB
|
|
trees_sizes sha1:~0.5kB | ~1.7kB
|
|
trees_sizes sha256:~0.6kB | ~2.3kB
|
|
blobs_sizes sha1:~0.1kB | ~0.1kB
|
|
blobs_sizes sha256:~0.1kB | ~0.1kB
|
|
tags_sizes sha1:~0.5kB | ~0.5kB
|
|
tags_sizes sha256:~0.5kB | ~0.6kB
|
|
EOF
|
|
|
|
tr , " " >expect <<-EOF &&
|
|
GIT SURVEY for "$(pwd)"
|
|
-----------------------------------------------------
|
|
|
|
REFERENCES SUMMARY
|
|
========================
|
|
, Ref Type | Count
|
|
-----------------+------
|
|
, Branches | 1
|
|
Remote refs | 0
|
|
Tags (all) | 2
|
|
Tags (annotated) | 2
|
|
|
|
REACHABLE OBJECT SUMMARY
|
|
========================
|
|
Object Type | Count
|
|
------------+------
|
|
Tags | 4
|
|
Commits | 10
|
|
Trees | 10
|
|
Blobs | 10
|
|
|
|
TOTAL OBJECT SIZES BY TYPE
|
|
===============================================
|
|
Object Type | Count | Disk Size | Inflated Size
|
|
------------+-------+-----------+--------------
|
|
Commits | 10 | $(test_oid commits_sizes)
|
|
Trees | 10 | $(test_oid trees_sizes)
|
|
Blobs | 10 | $(test_oid blobs_sizes)
|
|
Tags | 4 | $(test_oid tags_sizes)
|
|
EOF
|
|
|
|
approximate_sizes out >out-edited &&
|
|
lines=$(wc -l <expect) &&
|
|
head -n "$lines" <out-edited >out-trimmed &&
|
|
test_cmp expect out-trimmed &&
|
|
|
|
for type in "DIRECTORIES" "FILES"
|
|
do
|
|
for metric in "COUNT" "DISK SIZE" "INFLATED SIZE"
|
|
do
|
|
grep "TOP $type BY $metric" out || return 1
|
|
done || return 1
|
|
done
|
|
'
|
|
|
|
test_done
|