odb/source-loose: wire up for_each_object() callback

Move `odb_source_loose_for_each_object()` and its associated helpers
from "object-file.c" into "odb/source-loose.c" and wire it up as the
`for_each_object()` callback of the loose source.

Again, as in the preceding commit, we are forced to expose a couple of
functions from "object-file.c" that are now used by both subsystems.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Patrick Steinhardt
2026-06-01 10:20:31 +02:00
committed by Junio C Hamano
parent 727a935a71
commit e4f1d9ba57
5 changed files with 297 additions and 305 deletions

View File

@@ -862,8 +862,9 @@ static void batch_each_object(struct batch_options *opt,
*/
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next) {
int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
&payload, &opts);
struct odb_source_files *files = odb_source_files_downcast(source);
int ret = odb_source_for_each_object(&files->loose->base, NULL, batch_one_object_oi,
&payload, &opts);
if (ret)
break;
}

View File

@@ -22,7 +22,6 @@
#include "odb.h"
#include "odb/streaming.h"
#include "odb/transaction.h"
#include "oidtree.h"
#include "pack.h"
#include "packfile.h"
#include "path.h"
@@ -31,12 +30,6 @@
#include "tempfile.h"
#include "tmp-objdir.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
static struct oidtree *odb_source_loose_cache(struct odb_source *source,
const struct object_id *oid);
static int get_conv_flags(unsigned flags)
{
if (flags & INDEX_RENORMALIZE)
@@ -164,12 +157,6 @@ int stream_object_signature(struct repository *r,
return !oideq(oid, &real_oid) ? -1 : 0;
}
static int quick_has_loose(struct odb_source_loose *loose,
const struct object_id *oid)
{
return !!oidtree_contains(odb_source_loose_cache(&loose->files->base, oid), oid);
}
/*
* Map and close the given loose object fd. The path argument is used for
* error reporting.
@@ -227,9 +214,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
return ULHR_TOO_LONG;
}
static void *unpack_loose_rest(git_zstream *stream,
void *buffer, unsigned long size,
const struct object_id *oid)
void *unpack_loose_rest(git_zstream *stream,
void *buffer, unsigned long size,
const struct object_id *oid)
{
size_t bytes = strlen(buffer) + 1, n;
unsigned char *buf = xmallocz(size);
@@ -343,149 +330,6 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
int read_object_info_from_path(struct odb_source_loose *loose,
const char *path,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags)
{
int ret;
int fd;
unsigned long mapsize;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
struct stat st;
/*
* If we don't care about type or size, then we don't
* need to look inside the object at all. Note that we
* do not optimize out the stat call, even if the
* caller doesn't care about the disk-size, since our
* return value implicitly indicates whether the
* object even exists.
*/
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(loose, oid) ? 0 : -1;
goto out;
}
if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
if (oi) {
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
if (oi->mtimep)
*oi->mtimep = st.st_mtime;
}
ret = 0;
goto out;
}
fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
ret = -1;
goto out;
}
if (fstat(fd, &st)) {
close(fd);
ret = -1;
goto out;
}
mapsize = xsize_t(st.st_size);
if (!mapsize) {
close(fd);
ret = error(_("object file %s is empty"), path);
goto out;
}
map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (!map) {
ret = -1;
goto out;
}
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
if (oi->mtimep)
*oi->mtimep = st.st_mtime;
stream_to_end = &stream;
switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
case ULHR_OK:
if (!oi->sizep)
oi->sizep = &size_scratch;
if (!oi->typep)
oi->typep = &type_scratch;
if (parse_loose_header(hdr, oi) < 0) {
ret = error(_("unable to parse %s header"), oid_to_hex(oid));
goto corrupt;
}
if (*oi->typep < 0)
die(_("invalid object type"));
if (oi->contentp) {
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
if (!*oi->contentp) {
ret = -1;
goto corrupt;
}
}
break;
case ULHR_BAD:
ret = error(_("unable to unpack %s header"),
oid_to_hex(oid));
goto corrupt;
case ULHR_TOO_LONG:
ret = error(_("header for %s too long, exceeds %d bytes"),
oid_to_hex(oid), MAX_HEADER_LEN);
goto corrupt;
}
ret = 0;
corrupt:
if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
die(_("loose object %s (stored in %s) is corrupt"),
oid_to_hex(oid), path);
out:
if (stream_to_end)
git_inflate_end(stream_to_end);
if (map)
munmap(map, mapsize);
if (oi) {
if (oi->sizep == &size_scratch)
oi->sizep = NULL;
if (oi->typep == &type_scratch)
oi->typep = NULL;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo);
if (!ret)
oi->whence = OI_LOOSE;
}
return ret;
}
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
@@ -1667,13 +1511,13 @@ int read_pack_header(int fd, struct pack_header *header)
return 0;
}
static int for_each_file_in_obj_subdir(unsigned int subdir_nr,
struct strbuf *path,
const struct git_hash_algo *algop,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data)
int for_each_file_in_obj_subdir(unsigned int subdir_nr,
struct strbuf *path,
const struct git_hash_algo *algop,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data)
{
size_t origlen, baselen;
DIR *dir;
@@ -1758,78 +1602,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
struct for_each_object_wrapper_data {
struct odb_source_loose *loose;
const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
static int for_each_object_wrapper_cb(const struct object_id *oid,
const char *path,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
if (data->request) {
struct object_info oi = *data->request;
if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0)
return -1;
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
void *node_data UNUSED,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
if (data->request) {
struct object_info oi = *data->request;
if (odb_source_read_object_info(&data->loose->base,
oid, &oi, 0) < 0)
return -1;
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts)
{
struct odb_source_files *files = odb_source_files_downcast(source);
struct for_each_object_wrapper_data data = {
.loose = files->loose,
.request = request,
.cb = cb,
.cb_data = cb_data,
};
/* There are no loose promisor objects, so we can return immediately. */
if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
return 0;
if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
return 0;
if (opts->prefix)
return oidtree_each(odb_source_loose_cache(source, opts->prefix),
opts->prefix, opts->prefix_hex_len,
for_each_prefixed_object_wrapper_cb, &data);
return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
NULL, NULL, &data);
}
static int count_loose_object(const struct object_id *oid UNUSED,
struct object_info *oi UNUSED,
void *payload)
@@ -1843,6 +1615,7 @@ int odb_source_loose_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags,
unsigned long *out)
{
struct odb_source_files *files = odb_source_files_downcast(source);
const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
char *path = NULL;
DIR *dir = NULL;
@@ -1878,8 +1651,8 @@ int odb_source_loose_count_objects(struct odb_source *source,
} else {
struct odb_for_each_object_options opts = { 0 };
*out = 0;
ret = odb_source_loose_for_each_object(source, NULL, count_loose_object,
out, &opts);
ret = odb_source_for_each_object(&files->loose->base, NULL, count_loose_object,
out, &opts);
}
out:
@@ -1910,6 +1683,7 @@ int odb_source_loose_find_abbrev_len(struct odb_source *source,
unsigned min_len,
unsigned *out)
{
struct odb_source_files *files = odb_source_files_downcast(source);
struct odb_for_each_object_options opts = {
.prefix = oid,
.prefix_hex_len = min_len,
@@ -1920,54 +1694,13 @@ int odb_source_loose_find_abbrev_len(struct odb_source *source,
};
int ret;
ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb,
&data, &opts);
ret = odb_source_for_each_object(&files->loose->base, NULL, find_abbrev_len_cb,
&data, &opts);
*out = data.len;
return ret;
}
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
{
oidtree_insert(data, oid, NULL);
return 0;
}
static struct oidtree *odb_source_loose_cache(struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_files *files = odb_source_files_downcast(source);
int subdir_nr = oid->hash[0];
struct strbuf buf = STRBUF_INIT;
size_t word_bits = bitsizeof(files->loose->subdir_seen[0]);
size_t word_index = subdir_nr / word_bits;
size_t mask = (size_t)1u << (subdir_nr % word_bits);
uint32_t *bitmap;
if (subdir_nr < 0 ||
(size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen))
BUG("subdir_nr out of range");
bitmap = &files->loose->subdir_seen[word_index];
if (*bitmap & mask)
return files->loose->cache;
if (!files->loose->cache) {
ALLOC_ARRAY(files->loose->cache, 1);
oidtree_init(files->loose->cache);
}
strbuf_addstr(&buf, source->path);
for_each_file_in_obj_subdir(subdir_nr, &buf,
source->odb->repo->hash_algo,
append_loose_object,
NULL, NULL,
files->loose->cache);
*bitmap |= mask;
strbuf_release(&buf);
return files->loose->cache;
}
static int check_stream_oid(git_zstream *stream,
const char *hdr,
unsigned long size,

View File

@@ -6,6 +6,9 @@
#include "odb.h"
#include "odb/source-loose.h"
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
struct index_state;
enum {
@@ -85,19 +88,13 @@ int for_each_loose_file_in_source(struct odb_source *source,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data);
/*
* Iterate through all loose objects in the given object database source and
* invoke the callback function for each of them. If an object info request is
* given, then the object info will be read for every individual object and
* passed to the callback as if `odb_source_loose_read_object_info()` was
* called for the object.
*/
int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts);
int for_each_file_in_obj_subdir(unsigned int subdir_nr,
struct strbuf *path,
const struct git_hash_algo *algop,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data);
/*
* Count the number of loose objects in this source.
@@ -188,12 +185,6 @@ int read_loose_object(struct repository *repo,
void **contents,
struct object_info *oi);
int read_object_info_from_path(struct odb_source_loose *loose,
const char *path,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags);
enum unpack_loose_header_result {
ULHR_OK,
ULHR_BAD,
@@ -217,6 +208,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
unsigned long mapsize,
void *buffer,
unsigned long bufsiz);
void *unpack_loose_rest(git_zstream *stream,
void *buffer, unsigned long size,
const struct object_id *oid);
int parse_loose_header(const char *hdr, struct object_info *oi);

View File

@@ -82,7 +82,7 @@ static int odb_source_files_for_each_object(struct odb_source *source,
int ret;
if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts);
ret = odb_source_for_each_object(&files->loose->base, request, cb, cb_data, opts);
if (ret)
return ret;
}

View File

@@ -2,6 +2,7 @@
#include "abspath.h"
#include "chdir-notify.h"
#include "gettext.h"
#include "hex.h"
#include "loose.h"
#include "object-file.h"
#include "odb.h"
@@ -9,8 +10,198 @@
#include "odb/source-loose.h"
#include "odb/streaming.h"
#include "oidtree.h"
#include "repository.h"
#include "strbuf.h"
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
{
oidtree_insert(data, oid, NULL);
return 0;
}
static struct oidtree *odb_source_loose_cache(struct odb_source_loose *loose,
const struct object_id *oid)
{
int subdir_nr = oid->hash[0];
struct strbuf buf = STRBUF_INIT;
size_t word_bits = bitsizeof(loose->subdir_seen[0]);
size_t word_index = subdir_nr / word_bits;
size_t mask = (size_t)1u << (subdir_nr % word_bits);
uint32_t *bitmap;
if (subdir_nr < 0 ||
(size_t) subdir_nr >= bitsizeof(loose->subdir_seen))
BUG("subdir_nr out of range");
bitmap = &loose->subdir_seen[word_index];
if (*bitmap & mask)
return loose->cache;
if (!loose->cache) {
ALLOC_ARRAY(loose->cache, 1);
oidtree_init(loose->cache);
}
strbuf_addstr(&buf, loose->base.path);
for_each_file_in_obj_subdir(subdir_nr, &buf,
loose->base.odb->repo->hash_algo,
append_loose_object,
NULL, NULL,
loose->cache);
*bitmap |= mask;
strbuf_release(&buf);
return loose->cache;
}
static int quick_has_loose(struct odb_source_loose *loose,
const struct object_id *oid)
{
return !!oidtree_contains(odb_source_loose_cache(loose, oid), oid);
}
static int read_object_info_from_path(struct odb_source_loose *loose,
const char *path,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags)
{
int ret;
int fd;
unsigned long mapsize;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
struct stat st;
/*
* If we don't care about type or size, then we don't
* need to look inside the object at all. Note that we
* do not optimize out the stat call, even if the
* caller doesn't care about the disk-size, since our
* return value implicitly indicates whether the
* object even exists.
*/
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(loose, oid) ? 0 : -1;
goto out;
}
if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
if (oi) {
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
if (oi->mtimep)
*oi->mtimep = st.st_mtime;
}
ret = 0;
goto out;
}
fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
ret = -1;
goto out;
}
if (fstat(fd, &st)) {
close(fd);
ret = -1;
goto out;
}
mapsize = xsize_t(st.st_size);
if (!mapsize) {
close(fd);
ret = error(_("object file %s is empty"), path);
goto out;
}
map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (!map) {
ret = -1;
goto out;
}
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
if (oi->mtimep)
*oi->mtimep = st.st_mtime;
stream_to_end = &stream;
switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
case ULHR_OK:
if (!oi->sizep)
oi->sizep = &size_scratch;
if (!oi->typep)
oi->typep = &type_scratch;
if (parse_loose_header(hdr, oi) < 0) {
ret = error(_("unable to parse %s header"), oid_to_hex(oid));
goto corrupt;
}
if (*oi->typep < 0)
die(_("invalid object type"));
if (oi->contentp) {
*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
if (!*oi->contentp) {
ret = -1;
goto corrupt;
}
}
break;
case ULHR_BAD:
ret = error(_("unable to unpack %s header"),
oid_to_hex(oid));
goto corrupt;
case ULHR_TOO_LONG:
ret = error(_("header for %s too long, exceeds %d bytes"),
oid_to_hex(oid), MAX_HEADER_LEN);
goto corrupt;
}
ret = 0;
corrupt:
if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
die(_("loose object %s (stored in %s) is corrupt"),
oid_to_hex(oid), path);
out:
if (stream_to_end)
git_inflate_end(stream_to_end);
if (map)
munmap(map, mapsize);
if (oi) {
if (oi->sizep == &size_scratch)
oi->sizep = NULL;
if (oi->typep == &type_scratch)
oi->typep = NULL;
if (oi->delta_base_oid)
oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo);
if (!ret)
oi->whence = OI_LOOSE;
}
return ret;
}
static int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi,
@@ -218,6 +409,78 @@ error:
return -1;
}
struct for_each_object_wrapper_data {
struct odb_source_loose *loose;
const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
static int for_each_object_wrapper_cb(const struct object_id *oid,
const char *path,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
if (data->request) {
struct object_info oi = *data->request;
if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0)
return -1;
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
void *node_data UNUSED,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
if (data->request) {
struct object_info oi = *data->request;
if (odb_source_read_object_info(&data->loose->base,
oid, &oi, 0) < 0)
return -1;
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts)
{
struct odb_source_loose *loose = odb_source_loose_downcast(source);
struct for_each_object_wrapper_data data = {
.loose = loose,
.request = request,
.cb = cb,
.cb_data = cb_data,
};
/* There are no loose promisor objects, so we can return immediately. */
if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
return 0;
if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
return 0;
if (opts->prefix)
return oidtree_each(odb_source_loose_cache(loose, opts->prefix),
opts->prefix, opts->prefix_hex_len,
for_each_prefixed_object_wrapper_cb, &data);
return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
NULL, NULL, &data);
}
static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
{
oidtree_clear(loose->cache);
@@ -273,6 +536,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
loose->base.reprepare = odb_source_loose_reprepare;
loose->base.read_object_info = odb_source_loose_read_object_info;
loose->base.read_object_stream = odb_source_loose_read_object_stream;
loose->base.for_each_object = odb_source_loose_for_each_object;
if (!is_absolute_path(loose->base.path))
chdir_notify_register(NULL, odb_source_loose_reparent, loose);