Files
git/odb/source-packed.c
Patrick Steinhardt 1b6e48c974 odb/source-packed: drop pointer to "files" parent source
Over the last commits we have turned the packfile store into a proper
object database source that can be used as a standalone backend. As
such, it is no longer necessary to have it coupled to the "files" parent
source.

Remove the pointer to the owning "files" source so that the "packed"
source can be used as a standalone entity.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2026-06-10 00:59:40 +09:00

765 lines
19 KiB
C

#include "git-compat-util.h"
#include "abspath.h"
#include "chdir-notify.h"
#include "dir.h"
#include "git-zlib.h"
#include "mergesort.h"
#include "midx.h"
#include "odb/source-packed.h"
#include "odb/streaming.h"
#include "packfile.h"
static int find_pack_entry(struct odb_source_packed *store,
const struct object_id *oid,
struct pack_entry *e)
{
struct packfile_list_entry *l;
odb_source_packed_prepare(store);
if (store->midx && fill_midx_entry(store->midx, oid, e))
return 1;
for (l = store->packs.head; l; l = l->next) {
struct packed_git *p = l->pack;
if (!p->multi_pack_index && packfile_fill_entry(p, oid, e)) {
if (!store->skip_mru_updates)
packfile_list_prepend(&store->packs, p);
return 1;
}
}
return 0;
}
static int odb_source_packed_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct pack_entry e;
int ret;
/*
* In case the first read didn't surface the object, we have to reload
* packfiles. This may cause us to discover new packfiles that have
* been added since the last time we have prepared the packfile store.
*/
if (flags & OBJECT_INFO_SECOND_READ)
odb_source_reprepare(source);
if (!find_pack_entry(packed, oid, &e))
return 1;
/*
* We know that the caller doesn't actually need the
* information below, so return early.
*/
if (!oi)
return 0;
ret = packed_object_info(e.p, e.offset, oi);
if (ret < 0) {
mark_bad_packed_object(e.p, oid);
return -1;
}
return 0;
}
static int odb_source_packed_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct pack_entry e;
if (!find_pack_entry(packed, oid, &e))
return -1;
return packfile_read_object_stream(out, oid, e.p, e.offset);
}
struct odb_source_packed_for_each_object_wrapper_data {
struct odb_source_packed *store;
const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
static int odb_source_packed_for_each_object_wrapper(const struct object_id *oid,
struct packed_git *pack,
uint32_t index_pos,
void *cb_data)
{
struct odb_source_packed_for_each_object_wrapper_data *data = cb_data;
if (data->request) {
off_t offset = nth_packed_object_offset(pack, index_pos);
struct object_info oi = *data->request;
if (packed_object_info_with_index_pos(pack, offset,
&index_pos, &oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
{
do {
if (*a != *b)
return 0;
a++;
b++;
len -= 2;
} while (len > 1);
if (len)
if ((*a ^ *b) & 0xf0)
return 0;
return 1;
}
static int for_each_prefixed_object_in_midx(
struct odb_source_packed *store,
struct multi_pack_index *m,
const struct odb_for_each_object_options *opts,
struct odb_source_packed_for_each_object_wrapper_data *data)
{
int ret;
for (; m; m = m->base_midx) {
uint32_t num, i, first = 0;
int len = opts->prefix_hex_len > m->source->base.odb->repo->hash_algo->hexsz ?
m->source->base.odb->repo->hash_algo->hexsz : opts->prefix_hex_len;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
bsearch_one_midx(opts->prefix, m, &first);
/*
* At this point, "first" is the location of the lowest
* object with an object name that could match "opts->prefix".
* See if we have 0, 1 or more objects that actually match(es).
*/
for (i = first; i < num; i++) {
const struct object_id *current = NULL;
struct object_id oid;
current = nth_midxed_object_oid(&oid, m, i);
if (!match_hash(len, opts->prefix->hash, current->hash))
break;
if (data->request) {
struct object_info oi = *data->request;
ret = odb_source_read_object_info(&store->base, current,
&oi, 0);
if (ret)
goto out;
ret = data->cb(&oid, &oi, data->cb_data);
if (ret)
goto out;
} else {
ret = data->cb(&oid, NULL, data->cb_data);
if (ret)
goto out;
}
}
}
ret = 0;
out:
return ret;
}
static int for_each_prefixed_object_in_pack(
struct odb_source_packed *store,
struct packed_git *p,
const struct odb_for_each_object_options *opts,
struct odb_source_packed_for_each_object_wrapper_data *data)
{
uint32_t num, i, first = 0;
int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ?
p->repo->hash_algo->hexsz : opts->prefix_hex_len;
int ret;
num = p->num_objects;
bsearch_pack(opts->prefix, p, &first);
/*
* At this point, "first" is the location of the lowest object
* with an object name that could match "bin_pfx". See if we have
* 0, 1 or more objects that actually match(es).
*/
for (i = first; i < num; i++) {
struct object_id oid;
nth_packed_object_id(&oid, p, i);
if (!match_hash(len, opts->prefix->hash, oid.hash))
break;
if (data->request) {
struct object_info oi = *data->request;
ret = odb_source_read_object_info(&store->base, &oid, &oi, 0);
if (ret)
goto out;
ret = data->cb(&oid, &oi, data->cb_data);
if (ret)
goto out;
} else {
ret = data->cb(&oid, NULL, data->cb_data);
if (ret)
goto out;
}
}
ret = 0;
out:
return ret;
}
static int odb_source_packed_for_each_prefixed_object(
struct odb_source_packed *store,
const struct odb_for_each_object_options *opts,
struct odb_source_packed_for_each_object_wrapper_data *data)
{
struct packfile_list_entry *e;
struct multi_pack_index *m;
bool pack_errors = false;
int ret;
if (opts->flags)
BUG("flags unsupported");
store->skip_mru_updates = true;
m = get_multi_pack_index(store);
if (m) {
ret = for_each_prefixed_object_in_midx(store, m, opts, data);
if (ret)
goto out;
}
for (e = packfile_store_get_packs(store); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack)) {
pack_errors = true;
continue;
}
if (!e->pack->num_objects)
continue;
ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data);
if (ret)
goto out;
}
ret = 0;
out:
store->skip_mru_updates = false;
if (!ret && pack_errors)
ret = -1;
return ret;
}
static int odb_source_packed_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct odb_source_packed_for_each_object_wrapper_data data = {
.store = packed,
.request = request,
.cb = cb,
.cb_data = cb_data,
};
struct packfile_list_entry *e;
int pack_errors = 0, ret;
if (opts->prefix)
return odb_source_packed_for_each_prefixed_object(packed, opts, &data);
packed->skip_mru_updates = true;
for (e = packfile_store_get_packs(packed); e; e = e->next) {
struct packed_git *p = e->pack;
if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
pack_errors = 1;
continue;
}
ret = for_each_object_in_pack(p, odb_source_packed_for_each_object_wrapper,
&data, opts->flags);
if (ret)
goto out;
}
ret = 0;
out:
packed->skip_mru_updates = false;
if (!ret && pack_errors)
ret = -1;
return ret;
}
static int odb_source_packed_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags UNUSED,
unsigned long *out)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct packfile_list_entry *e;
struct multi_pack_index *m;
unsigned long count = 0;
int ret;
m = get_multi_pack_index(packed);
if (m)
count += m->num_objects + m->num_objects_in_base;
for (e = packfile_store_get_packs(packed); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack)) {
ret = -1;
goto out;
}
count += e->pack->num_objects;
}
*out = count;
ret = 0;
out:
return ret;
}
static int extend_abbrev_len(const struct object_id *a,
const struct object_id *b,
unsigned *out)
{
unsigned len = oid_common_prefix_hexlen(a, b);
if (len != hash_algos[a->algo].hexsz && len >= *out)
*out = len + 1;
return 0;
}
static void find_abbrev_len_for_midx(struct multi_pack_index *m,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
unsigned len = min_len;
for (; m; m = m->base_midx) {
int match = 0;
uint32_t num, first = 0;
struct object_id found_oid;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
match = bsearch_one_midx(oid, m, &first);
/*
* first is now the position in the packfile where we
* would insert the object ID if it does not exist (or the
* position of the object ID if it does exist). Hence, we
* consider a maximum of two objects nearby for the
* abbreviation length.
*/
if (!match) {
if (nth_midxed_object_oid(&found_oid, m, first))
extend_abbrev_len(&found_oid, oid, &len);
} else if (first < num - 1) {
if (nth_midxed_object_oid(&found_oid, m, first + 1))
extend_abbrev_len(&found_oid, oid, &len);
}
if (first > 0) {
if (nth_midxed_object_oid(&found_oid, m, first - 1))
extend_abbrev_len(&found_oid, oid, &len);
}
}
*out = len;
}
static void find_abbrev_len_for_pack(struct packed_git *p,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
int match;
uint32_t num, first = 0;
struct object_id found_oid;
unsigned len = min_len;
num = p->num_objects;
match = bsearch_pack(oid, p, &first);
/*
* first is now the position in the packfile where we would insert
* the object ID if it does not exist (or the position of mad->hash if
* it does exist). Hence, we consider a maximum of two objects
* nearby for the abbreviation length.
*/
if (!match) {
if (!nth_packed_object_id(&found_oid, p, first))
extend_abbrev_len(&found_oid, oid, &len);
} else if (first < num - 1) {
if (!nth_packed_object_id(&found_oid, p, first + 1))
extend_abbrev_len(&found_oid, oid, &len);
}
if (first > 0) {
if (!nth_packed_object_id(&found_oid, p, first - 1))
extend_abbrev_len(&found_oid, oid, &len);
}
*out = len;
}
static int odb_source_packed_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct packfile_list_entry *e;
struct multi_pack_index *m;
m = get_multi_pack_index(packed);
if (m)
find_abbrev_len_for_midx(m, oid, min_len, &min_len);
for (e = packfile_store_get_packs(packed); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack) || !e->pack->num_objects)
continue;
find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len);
}
*out = min_len;
return 0;
}
static int odb_source_packed_freshen_object(struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct pack_entry e;
if (!find_pack_entry(packed, oid, &e))
return 0;
if (e.p->is_cruft)
return 0;
if (e.p->freshened)
return 1;
if (utime(e.p->pack_name, NULL))
return 0;
e.p->freshened = 1;
return 1;
}
static int odb_source_packed_write_object(struct odb_source *source UNUSED,
const void *buf UNUSED,
unsigned long len UNUSED,
enum object_type type UNUSED,
struct object_id *oid UNUSED,
struct object_id *compat_oid UNUSED,
unsigned flags UNUSED)
{
return error("packed backend cannot write objects");
}
static int odb_source_packed_write_object_stream(struct odb_source *source UNUSED,
struct odb_write_stream *stream UNUSED,
size_t len UNUSED,
struct object_id *oid UNUSED)
{
return error("packed backend cannot write object streams");
}
static int odb_source_packed_begin_transaction(struct odb_source *source UNUSED,
struct odb_transaction **out UNUSED)
{
return error("packed backend cannot begin transactions");
}
static int odb_source_packed_read_alternates(struct odb_source *source UNUSED,
struct strvec *out UNUSED)
{
return 0;
}
static int odb_source_packed_write_alternate(struct odb_source *source UNUSED,
const char *alternate UNUSED)
{
return error("packed backend cannot write alternates");
}
void (*report_garbage)(unsigned seen_bits, const char *path);
static void report_helper(const struct string_list *list,
int seen_bits, int first, int last)
{
if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
return;
for (; first < last; first++)
report_garbage(seen_bits, list->items[first].string);
}
static void report_pack_garbage(struct string_list *list)
{
int baselen = -1, first = 0, seen_bits = 0;
if (!report_garbage)
return;
string_list_sort(list);
for (size_t i = 0; i < list->nr; i++) {
const char *path = list->items[i].string;
if (baselen != -1 &&
strncmp(path, list->items[first].string, baselen)) {
report_helper(list, seen_bits, first, i);
baselen = -1;
seen_bits = 0;
}
if (baselen == -1) {
const char *dot = strrchr(path, '.');
if (!dot) {
report_garbage(PACKDIR_FILE_GARBAGE, path);
continue;
}
baselen = dot - path + 1;
first = i;
}
if (!strcmp(path + baselen, "pack"))
seen_bits |= 1;
else if (!strcmp(path + baselen, "idx"))
seen_bits |= 2;
}
report_helper(list, seen_bits, first, list->nr);
}
struct prepare_pack_data {
struct odb_source_packed *source;
struct string_list *garbage;
};
static void prepare_pack(const char *full_name, size_t full_name_len,
const char *file_name, void *_data)
{
struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
size_t base_len = full_name_len;
if (strip_suffix_mem(full_name, &base_len, ".idx") &&
!(data->source->midx &&
midx_contains_pack(data->source->midx, file_name))) {
char *trimmed_path = xstrndup(full_name, full_name_len);
packfile_store_load_pack(data->source,
trimmed_path, data->source->base.local);
free(trimmed_path);
}
if (!report_garbage)
return;
if (!strcmp(file_name, "multi-pack-index") ||
!strcmp(file_name, "multi-pack-index.d"))
return;
if (starts_with(file_name, "multi-pack-index") &&
(ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev")))
return;
if (ends_with(file_name, ".idx") ||
ends_with(file_name, ".rev") ||
ends_with(file_name, ".pack") ||
ends_with(file_name, ".bitmap") ||
ends_with(file_name, ".keep") ||
ends_with(file_name, ".promisor") ||
ends_with(file_name, ".mtimes"))
string_list_append(data->garbage, full_name);
else
report_garbage(PACKDIR_FILE_GARBAGE, full_name);
}
static void prepare_packed_git_one(struct odb_source_packed *source)
{
struct string_list garbage = STRING_LIST_INIT_DUP;
struct prepare_pack_data data = {
.source = source,
.garbage = &garbage,
};
for_each_file_in_pack_dir(source->base.path, prepare_pack, &data);
report_pack_garbage(data.garbage);
string_list_clear(data.garbage, 0);
}
DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next);
static int sort_pack(const struct packfile_list_entry *a,
const struct packfile_list_entry *b)
{
int st;
/*
* Local packs tend to contain objects specific to our
* variant of the project than remote ones. In addition,
* remote ones could be on a network mounted filesystem.
* Favor local ones for these reasons.
*/
st = a->pack->pack_local - b->pack->pack_local;
if (st)
return -st;
/*
* Younger packs tend to contain more recent objects,
* and more recent objects tend to get accessed more
* often.
*/
if (a->pack->mtime < b->pack->mtime)
return 1;
else if (a->pack->mtime == b->pack->mtime)
return 0;
return -1;
}
void odb_source_packed_prepare(struct odb_source_packed *source)
{
if (source->initialized)
return;
prepare_multi_pack_index_one(source);
prepare_packed_git_one(source);
sort_packs(&source->packs.head, sort_pack);
for (struct packfile_list_entry *e = source->packs.head; e; e = e->next)
if (!e->next)
source->packs.tail = e;
source->initialized = true;
}
static void odb_source_packed_reprepare(struct odb_source *source)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
packed->initialized = false;
odb_source_packed_prepare(packed);
}
static void odb_source_packed_reparent(const char *name UNUSED,
const char *old_cwd,
const char *new_cwd,
void *cb_data)
{
struct odb_source_packed *packed = cb_data;
char *path = reparent_relative_path(old_cwd, new_cwd,
packed->base.path);
free(packed->base.path);
packed->base.path = path;
}
static void odb_source_packed_close(struct odb_source *source)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) {
if (e->pack->do_not_close)
BUG("want to close pack marked 'do-not-close'");
close_pack(e->pack);
}
if (packed->midx)
close_midx(packed->midx);
packed->midx = NULL;
}
static void odb_source_packed_free(struct odb_source *source)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
chdir_notify_unregister(NULL, odb_source_packed_reparent, packed);
for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next)
free(e->pack);
packfile_list_clear(&packed->packs);
strmap_clear(&packed->packs_by_path, 0);
odb_source_release(&packed->base);
free(packed);
}
struct odb_source_packed *odb_source_packed_new(struct object_database *odb,
const char *path,
bool local)
{
struct odb_source_packed *packed;
CALLOC_ARRAY(packed, 1);
odb_source_init(&packed->base, odb, ODB_SOURCE_PACKED, path, local);
strmap_init(&packed->packs_by_path);
packed->base.free = odb_source_packed_free;
packed->base.close = odb_source_packed_close;
packed->base.reprepare = odb_source_packed_reprepare;
packed->base.read_object_info = odb_source_packed_read_object_info;
packed->base.read_object_stream = odb_source_packed_read_object_stream;
packed->base.for_each_object = odb_source_packed_for_each_object;
packed->base.count_objects = odb_source_packed_count_objects;
packed->base.find_abbrev_len = odb_source_packed_find_abbrev_len;
packed->base.freshen_object = odb_source_packed_freshen_object;
packed->base.write_object = odb_source_packed_write_object;
packed->base.write_object_stream = odb_source_packed_write_object_stream;
packed->base.begin_transaction = odb_source_packed_begin_transaction;
packed->base.read_alternates = odb_source_packed_read_alternates;
packed->base.write_alternate = odb_source_packed_write_alternate;
if (!is_absolute_path(path))
chdir_notify_register(NULL, odb_source_packed_reparent, packed);
return packed;
}