Merge branch 'lp/repack-propagate-promisor-debugging-info' into seen

When fetching objects into a lazily cloned repository, .promisor
files are created with information meant to help debugging.  "git
repack" has been taught to carry this information forward to
packfiles that are newly created.

Retracted.
cf. <agx_GPfBKpkSc3Gx@lorenzo-VM>

* lp/repack-propagate-promisor-debugging-info:
  repack-promisor: add missing headers
  t7703: test for promisor file content after geometric repack
  t7700: test for promisor file content after repack
  repack-promisor: preserve content of promisor files after repack
  repack-promisor add helper to fill promisor file after repack
  pack-write: add explanation to promisor file content
This commit is contained in:
Junio C Hamano
2026-06-04 08:14:06 +09:00
5 changed files with 280 additions and 21 deletions

View File

@@ -45,8 +45,8 @@ other objects in that pack they already have locally.
+
Promisor packfiles are repacked separately: if there are packfiles that
have an associated ".promisor" file, these packfiles will be repacked
into another separate pack, and an empty ".promisor" file corresponding
to the new separate pack will be written.
into another separate pack, and a ".promisor" file corresponding to the
new separate pack will be written (with arbitrary contents).
-A::
Same as `-a`, unless `-d` is used. Then any unreachable

View File

@@ -603,6 +603,15 @@ void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_
int i, err;
FILE *output = xfopen(promisor_name, "w");
/*
* Write in the .promisor file the ref names and associated hashes,
* obtained by fetch-pack, at the point of generation of the
* corresponding packfile. These pieces of info are only used to make
* it easier to debug issues with partial clones, as we can identify
* what refs (and their associated hashes) were fetched at the time
* the packfile was downloaded, and if necessary, compare those hashes
* against what the promisor remote reports now.
*/
for (i = 0; i < nr_sought; i++)
fprintf(output, "%s %s\n", oid_to_hex(&sought[i]->old_oid),
sought[i]->name);

View File

@@ -1,11 +1,18 @@
#include "git-compat-util.h"
#include "repack.h"
#include "hash.h"
#include "hex.h"
#include "odb.h"
#include "pack.h"
#include "packfile.h"
#include "path.h"
#include "refs.h"
#include "repository.h"
#include "run-command.h"
#include "strbuf.h"
#include "string-list.h"
#include "strmap.h"
#include "strvec.h"
struct write_oid_context {
struct child_process *cmd;
@@ -34,10 +41,159 @@ static int write_oid(const struct object_id *oid,
return 0;
}
/*
* Go through all .promisor files contained in repo (excluding those whose name
* appears in not_repacked_basenames, which acts as a ignorelist), and copies
* their content inside the destination file "<packtmp>-<dest_hex>.promisor".
* Each line of a never repacked .promisor file is: "<oid> <ref>" (as described
* in the write_promisor_file() function).
* After a repack, the copied lines will be: "<oid> <ref> <time>", where <time>
* is the time (in Unix time) at which the .promisor file was last modified.
* Only the lines whose <oid> is present inside "<packtmp>-<dest_hex>.idx" will
* be copied.
* The contents of all .promisor files are assumed to be correctly formed.
*/
static void write_promisor_file_after_repack(struct repository *repo,
const char *dest_hex,
const char *packtmp,
struct strset *not_repacked_basenames)
{
char *dest_promisor_name;
char *dest_idx_name;
FILE *dest;
struct object_id dest_oid;
struct packed_git *dest_pack, *p;
struct strbuf source_promisor_name = STRBUF_INIT;
struct strset seen_lines = STRSET_INIT;
struct strbuf line = STRBUF_INIT;
int err;
/* First of all, let's create and open the .promisor dest file */
dest_promisor_name = mkpathdup("%s-%s.promisor", packtmp, dest_hex);
dest = xfopen(dest_promisor_name, "w");
/*
* Now let's retrieve the destination pack.
* We use parse_pack_index() because dest_hex/packtmp point to the packfile
* that "pack-objects" just created, which is about to become part of this
* repository, but has not yet been finalized.
* If we are here, we know that "pack-objects" did not fail, so
* parse_pack_index() being loose in validation does not pose a problem.
* If an error happens, we simply leave the ".promisor" file empty.
*/
if (get_oid_hex_algop(dest_hex, &dest_oid, repo->hash_algo)) {
warning(_("Promisor file left empty: '%s' not a hash"), dest_hex);
if (fclose(dest))
die(_("Could not close '%s' promisor file"), dest_promisor_name);
free(dest_promisor_name);
return;
}
dest_idx_name = mkpathdup("%s-%s.idx", packtmp, dest_hex);
dest_pack = parse_pack_index(repo, dest_oid.hash, dest_idx_name);
if (!dest_pack) {
warning(_("Promisor file left empty: couldn't open packfile '%s'"), dest_idx_name);
if (fclose(dest))
die(_("Could not close '%s' promisor file"), dest_promisor_name);
free(dest_promisor_name);
free(dest_idx_name);
return;
}
repo_for_each_pack(repo, p) {
FILE *source;
struct stat source_stat;
if (!p->pack_promisor)
continue;
if (not_repacked_basenames &&
strset_contains(not_repacked_basenames, pack_basename(p)))
continue;
strbuf_reset(&source_promisor_name);
strbuf_addstr(&source_promisor_name, p->pack_name);
strbuf_strip_suffix(&source_promisor_name, ".pack");
strbuf_addstr(&source_promisor_name, ".promisor");
if (stat(source_promisor_name.buf, &source_stat))
die(_("File not found: %s"), source_promisor_name.buf);
source = xfopen(source_promisor_name.buf, "r");
while (strbuf_getline(&line, source) != EOF) {
struct string_list line_sections = STRING_LIST_INIT_DUP;
struct object_id oid;
/* Split line into <oid>, <ref> and <time> (if <time> exists).
* Check that it was actually split into 2 or 3 parts. If it was
* not, then it is malformed, so skip it.
*/
string_list_split(&line_sections, line.buf, " ", 3);
if (line_sections.nr != 2 && line_sections.nr != 3) {
string_list_clear(&line_sections, 0);
continue;
}
/* Skip the lines where <oid> is not a sane hexadecimal string */
if (get_oid_hex_algop(line_sections.items[0].string,
&oid, repo->hash_algo)) {
string_list_clear(&line_sections, 0);
continue;
}
/* Ignore the lines where <oid> doesn't appear in the dest_pack */
if (!find_pack_entry_one(&oid, dest_pack)) {
string_list_clear(&line_sections, 0);
continue;
}
/*
* Skip the lines where <ref> does not have the
* correct format for a refname.
*/
printf("%s\n", line_sections.items[1].string);
if (check_refname_format(line_sections.items[1].string,
REFNAME_ALLOW_ONELEVEL)) {
string_list_clear(&line_sections, 0);
continue;
}
/* If <time> doesn't exist, retrieve it and add it to line */
if (line_sections.nr != 3)
strbuf_addf(&line, " %" PRItime,
(timestamp_t)source_stat.st_mtime);
/* If the finalized line is new, append it to dest */
if (strset_add(&seen_lines, line.buf))
fprintf(dest, "%s\n", line.buf);
string_list_clear(&line_sections, 0);
}
err = ferror(source);
err |= fclose(source);
if (err)
die(_("Could not read '%s' promisor file"), source_promisor_name.buf);
}
err = ferror(dest);
err |= fclose(dest);
if (err)
die(_("Could not write '%s' promisor file"), dest_promisor_name);
close_pack_index(dest_pack);
free(dest_pack);
free(dest_promisor_name);
free(dest_idx_name);
strbuf_release(&source_promisor_name);
strbuf_release(&line);
strset_clear(&seen_lines);
}
static void finish_repacking_promisor_objects(struct repository *repo,
struct child_process *cmd,
struct string_list *names,
const char *packtmp)
const char *packtmp,
struct strset *not_repacked_basenames)
{
struct strbuf line = STRBUF_INIT;
FILE *out;
@@ -47,7 +203,6 @@ static void finish_repacking_promisor_objects(struct repository *repo,
out = xfdopen(cmd->out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
struct string_list_item *item;
char *promisor_name;
if (line.len != repo->hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
@@ -55,22 +210,16 @@ static void finish_repacking_promisor_objects(struct repository *repo,
/*
* pack-objects creates the .pack and .idx files, but not the
* .promisor file. Create the .promisor file, which is empty.
*
* NEEDSWORK: fetch-pack sometimes generates non-empty
* .promisor files containing the ref names and associated
* hashes at the point of generation of the corresponding
* packfile, but this would not preserve their contents. Maybe
* concatenate the contents of all .promisor files instead of
* just creating a new empty file.
* ".promisor" file. To create the "".promisor" file, we don't use the
* helper function write_promisor_file(), but instead we use the
* specific function write_promisor_file_after_repack(), which creates
* the file and appropriately fills it with the content of the
* ".promisor" files used for the repack.
*/
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
line.buf);
write_promisor_file(promisor_name, NULL, 0);
write_promisor_file_after_repack(repo, line.buf, packtmp,
not_repacked_basenames);
item->util = generated_pack_populate(item->string, packtmp);
free(promisor_name);
}
fclose(out);
@@ -107,7 +256,7 @@ void repack_promisor_objects(struct repository *repo,
return;
}
finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
finish_repacking_promisor_objects(repo, &cmd, names, packtmp, NULL);
}
void pack_geometry_repack_promisors(struct repository *repo,
@@ -118,6 +267,7 @@ void pack_geometry_repack_promisors(struct repository *repo,
{
struct child_process cmd = CHILD_PROCESS_INIT;
FILE *in;
struct strset not_repacked_basenames = STRSET_INIT;
if (!geometry->promisor_split)
return;
@@ -131,9 +281,15 @@ void pack_geometry_repack_promisors(struct repository *repo,
in = xfdopen(cmd.in, "w");
for (size_t i = 0; i < geometry->promisor_split; i++)
fprintf(in, "%s\n", pack_basename(geometry->promisor_pack[i]));
for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++)
fprintf(in, "^%s\n", pack_basename(geometry->promisor_pack[i]));
for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++) {
const char *name = pack_basename(geometry->promisor_pack[i]);
fprintf(in, "^%s\n", name);
strset_add(&not_repacked_basenames, name);
}
fclose(in);
finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
finish_repacking_promisor_objects(repo, &cmd, names, packtmp,
strset_get_size(&not_repacked_basenames) ? &not_repacked_basenames : NULL);
strset_clear(&not_repacked_basenames);
}

View File

@@ -904,4 +904,65 @@ test_expect_success 'pending objects are repacked appropriately' '
)
'
test_expect_success 'check one .promisor file content after repack' '
test_when_finished rm -rf prom_test prom_before_repack &&
git init prom_test &&
path=prom_test/.git/objects/pack &&
(
# Create 1 pack
test_commit_bulk -C prom_test 1 &&
# Simulate .promisor file by creating it manually
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/") &&
oid=$(git -C prom_test rev-parse HEAD) &&
echo "$oid ref" >"$prom" &&
# Repack, and check if correct
git -C prom_test repack -a -d -f &&
prom=$(find $path -name "*.promisor") &&
# $prom should contain "$oid ref <time>"
test_grep "$oid ref " "$prom" &&
# Save the current .promisor content, repack, and check if correct
cp "$prom" prom_before_repack &&
git -C prom_test repack -a -d -f &&
prom=$(find $path -name "*.promisor") &&
# $prom should be exactly the same as prom_before_repack
test_cmp prom_before_repack "$prom"
)
'
test_expect_success 'check multiple .promisor file content after repack' '
test_when_finished rm -rf prom_test prom_before_repack &&
git init prom_test &&
path=prom_test/.git/objects/pack &&
(
# Create 2 packs and simulate .promisor files by creating them manually
test_commit_bulk -C prom_test 1 &&
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/") &&
oid1=$(git -C prom_test rev-parse HEAD) &&
echo "$oid1 ref1" >"$prom" &&
test_commit_bulk -C prom_test 1 &&
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom|d") &&
oid2=$(git -C prom_test rev-parse HEAD) &&
echo "$oid2 ref2" >"$prom" &&
# Repack, and check if correct
git -C prom_test repack -a -d -f &&
prom=$(find $path -name "*.promisor") &&
# $prom should contain "$oid1 ref1 <time>" & "$oid2 ref2 <time>"
test_grep "$oid1 ref1 " "$prom" &&
test_grep "$oid2 ref2 " "$prom" &&
# Save the current .promisor content, repack, and check if correct
cp "$prom" prom_before_repack &&
git -C prom_test repack -a -d -f &&
prom=$(find $path -name "*.promisor") &&
# $prom should be exactly the same as prom_before_repack
test_cmp prom_before_repack "$prom"
)
'
test_done

View File

@@ -541,4 +541,37 @@ test_expect_success 'geometric repack works with promisor packs' '
)
'
test_expect_success 'check .promisor file content after geometric repack' '
test_when_finished rm -rf prom_test &&
git init prom_test &&
path=prom_test/.git/objects/pack &&
(
# Create 2 packs with 3 objs each, and manually create .promisor files
test_commit_bulk -C prom_test --start=1 1 && # 3 objects
prom1=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/") &&
oid1=$(git -C prom_test rev-parse HEAD) &&
echo "$oid1 ref1" >"$prom1" &&
test_commit_bulk -C prom_test --start=2 1 && # 3 objects
prom2=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom1|d") &&
oid2=$(git -C prom_test rev-parse HEAD) &&
echo "$oid2 ref2" >"$prom2" &&
# Create 1 pack with 12 objs, and manually create .promisor file
test_commit_bulk -C prom_test --start=3 4 && # 12 objects
prom3=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom1|d; \|$prom2|d") &&
oid3=$(git -C prom_test rev-parse HEAD) &&
echo "$oid3 ref3" >"$prom3" &&
# Geometric repack, and check if correct
git -C prom_test repack --geometric 2 -d &&
prom=$(find $path -name "*.pack" | sed "s/.pack$/.promisor/; \|$prom3|d") &&
# $prom should have repacked only the first 2 small packs, so it should only
# contain the following: "$oid1 ref1 <time>" & "$oid2 ref2 <time>"
test_grep "$oid1 ref1 " "$prom" &&
test_grep "$oid2 ref2 " "$prom" &&
test_grep ! "$oid3 ref3" "$prom"
)
'
test_done