#include "git-compat-util.h" #include "abspath.h" #include "chdir-notify.h" #include "dir.h" #include "git-zlib.h" #include "mergesort.h" #include "midx.h" #include "odb/source-packed.h" #include "odb/streaming.h" #include "packfile.h" static int find_pack_entry(struct odb_source_packed *store, const struct object_id *oid, struct pack_entry *e) { struct packfile_list_entry *l; odb_source_packed_prepare(store); if (store->midx && fill_midx_entry(store->midx, oid, e)) return 1; for (l = store->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && packfile_fill_entry(p, oid, e)) { if (!store->skip_mru_updates) packfile_list_prepend(&store->packs, p); return 1; } } return 0; } static int odb_source_packed_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, enum object_info_flags flags) { struct odb_source_packed *packed = odb_source_packed_downcast(source); struct pack_entry e; int ret; /* * In case the first read didn't surface the object, we have to reload * packfiles. This may cause us to discover new packfiles that have * been added since the last time we have prepared the packfile store. */ if (flags & OBJECT_INFO_SECOND_READ) odb_source_reprepare(source); if (!find_pack_entry(packed, oid, &e)) return 1; /* * We know that the caller doesn't actually need the * information below, so return early. */ if (!oi) return 0; ret = packed_object_info(e.p, e.offset, oi); if (ret < 0) { mark_bad_packed_object(e.p, oid); return -1; } return 0; } static int odb_source_packed_read_object_stream(struct odb_read_stream **out, struct odb_source *source, const struct object_id *oid) { struct odb_source_packed *packed = odb_source_packed_downcast(source); struct pack_entry e; if (!find_pack_entry(packed, oid, &e)) return -1; return packfile_read_object_stream(out, oid, e.p, e.offset); } struct odb_source_packed_for_each_object_wrapper_data { struct odb_source_packed *store; const struct object_info *request; odb_for_each_object_cb cb; void *cb_data; }; static int odb_source_packed_for_each_object_wrapper(const struct object_id *oid, struct packed_git *pack, uint32_t index_pos, void *cb_data) { struct odb_source_packed_for_each_object_wrapper_data *data = cb_data; if (data->request) { off_t offset = nth_packed_object_offset(pack, index_pos); struct object_info oi = *data->request; if (packed_object_info_with_index_pos(pack, offset, &index_pos, &oi) < 0) { mark_bad_packed_object(pack, oid); return -1; } return data->cb(oid, &oi, data->cb_data); } else { return data->cb(oid, NULL, data->cb_data); } } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) { do { if (*a != *b) return 0; a++; b++; len -= 2; } while (len > 1); if (len) if ((*a ^ *b) & 0xf0) return 0; return 1; } static int for_each_prefixed_object_in_midx( struct odb_source_packed *store, struct multi_pack_index *m, const struct odb_for_each_object_options *opts, struct odb_source_packed_for_each_object_wrapper_data *data) { int ret; for (; m; m = m->base_midx) { uint32_t num, i, first = 0; int len = opts->prefix_hex_len > m->source->base.odb->repo->hash_algo->hexsz ? m->source->base.odb->repo->hash_algo->hexsz : opts->prefix_hex_len; if (!m->num_objects) continue; num = m->num_objects + m->num_objects_in_base; bsearch_one_midx(opts->prefix, m, &first); /* * At this point, "first" is the location of the lowest * object with an object name that could match "opts->prefix". * See if we have 0, 1 or more objects that actually match(es). */ for (i = first; i < num; i++) { const struct object_id *current = NULL; struct object_id oid; current = nth_midxed_object_oid(&oid, m, i); if (!match_hash(len, opts->prefix->hash, current->hash)) break; if (data->request) { struct object_info oi = *data->request; ret = odb_source_read_object_info(&store->base, current, &oi, 0); if (ret) goto out; ret = data->cb(&oid, &oi, data->cb_data); if (ret) goto out; } else { ret = data->cb(&oid, NULL, data->cb_data); if (ret) goto out; } } } ret = 0; out: return ret; } static int for_each_prefixed_object_in_pack( struct odb_source_packed *store, struct packed_git *p, const struct odb_for_each_object_options *opts, struct odb_source_packed_for_each_object_wrapper_data *data) { uint32_t num, i, first = 0; int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ? p->repo->hash_algo->hexsz : opts->prefix_hex_len; int ret; num = p->num_objects; bsearch_pack(opts->prefix, p, &first); /* * At this point, "first" is the location of the lowest object * with an object name that could match "bin_pfx". See if we have * 0, 1 or more objects that actually match(es). */ for (i = first; i < num; i++) { struct object_id oid; nth_packed_object_id(&oid, p, i); if (!match_hash(len, opts->prefix->hash, oid.hash)) break; if (data->request) { struct object_info oi = *data->request; ret = odb_source_read_object_info(&store->base, &oid, &oi, 0); if (ret) goto out; ret = data->cb(&oid, &oi, data->cb_data); if (ret) goto out; } else { ret = data->cb(&oid, NULL, data->cb_data); if (ret) goto out; } } ret = 0; out: return ret; } static int odb_source_packed_for_each_prefixed_object( struct odb_source_packed *store, const struct odb_for_each_object_options *opts, struct odb_source_packed_for_each_object_wrapper_data *data) { struct packfile_list_entry *e; struct multi_pack_index *m; bool pack_errors = false; int ret; if (opts->flags) BUG("flags unsupported"); store->skip_mru_updates = true; m = get_multi_pack_index(store); if (m) { ret = for_each_prefixed_object_in_midx(store, m, opts, data); if (ret) goto out; } for (e = packfile_store_get_packs(store); e; e = e->next) { if (e->pack->multi_pack_index) continue; if (open_pack_index(e->pack)) { pack_errors = true; continue; } if (!e->pack->num_objects) continue; ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data); if (ret) goto out; } ret = 0; out: store->skip_mru_updates = false; if (!ret && pack_errors) ret = -1; return ret; } static int odb_source_packed_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, const struct odb_for_each_object_options *opts) { struct odb_source_packed *packed = odb_source_packed_downcast(source); struct odb_source_packed_for_each_object_wrapper_data data = { .store = packed, .request = request, .cb = cb, .cb_data = cb_data, }; struct packfile_list_entry *e; int pack_errors = 0, ret; if (opts->prefix) return odb_source_packed_for_each_prefixed_object(packed, opts, &data); packed->skip_mru_updates = true; for (e = packfile_store_get_packs(packed); e; e = e->next) { struct packed_git *p = e->pack; if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { pack_errors = 1; continue; } ret = for_each_object_in_pack(p, odb_source_packed_for_each_object_wrapper, &data, opts->flags); if (ret) goto out; } ret = 0; out: packed->skip_mru_updates = false; if (!ret && pack_errors) ret = -1; return ret; } static int odb_source_packed_count_objects(struct odb_source *source, enum odb_count_objects_flags flags UNUSED, unsigned long *out) { struct odb_source_packed *packed = odb_source_packed_downcast(source); struct packfile_list_entry *e; struct multi_pack_index *m; unsigned long count = 0; int ret; m = get_multi_pack_index(packed); if (m) count += m->num_objects + m->num_objects_in_base; for (e = packfile_store_get_packs(packed); e; e = e->next) { if (e->pack->multi_pack_index) continue; if (open_pack_index(e->pack)) { ret = -1; goto out; } count += e->pack->num_objects; } *out = count; ret = 0; out: return ret; } static int extend_abbrev_len(const struct object_id *a, const struct object_id *b, unsigned *out) { unsigned len = oid_common_prefix_hexlen(a, b); if (len != hash_algos[a->algo].hexsz && len >= *out) *out = len + 1; return 0; } static void find_abbrev_len_for_midx(struct multi_pack_index *m, const struct object_id *oid, unsigned min_len, unsigned *out) { unsigned len = min_len; for (; m; m = m->base_midx) { int match = 0; uint32_t num, first = 0; struct object_id found_oid; if (!m->num_objects) continue; num = m->num_objects + m->num_objects_in_base; match = bsearch_one_midx(oid, m, &first); /* * first is now the position in the packfile where we * would insert the object ID if it does not exist (or the * position of the object ID if it does exist). Hence, we * consider a maximum of two objects nearby for the * abbreviation length. */ if (!match) { if (nth_midxed_object_oid(&found_oid, m, first)) extend_abbrev_len(&found_oid, oid, &len); } else if (first < num - 1) { if (nth_midxed_object_oid(&found_oid, m, first + 1)) extend_abbrev_len(&found_oid, oid, &len); } if (first > 0) { if (nth_midxed_object_oid(&found_oid, m, first - 1)) extend_abbrev_len(&found_oid, oid, &len); } } *out = len; } static void find_abbrev_len_for_pack(struct packed_git *p, const struct object_id *oid, unsigned min_len, unsigned *out) { int match; uint32_t num, first = 0; struct object_id found_oid; unsigned len = min_len; num = p->num_objects; match = bsearch_pack(oid, p, &first); /* * first is now the position in the packfile where we would insert * the object ID if it does not exist (or the position of mad->hash if * it does exist). Hence, we consider a maximum of two objects * nearby for the abbreviation length. */ if (!match) { if (!nth_packed_object_id(&found_oid, p, first)) extend_abbrev_len(&found_oid, oid, &len); } else if (first < num - 1) { if (!nth_packed_object_id(&found_oid, p, first + 1)) extend_abbrev_len(&found_oid, oid, &len); } if (first > 0) { if (!nth_packed_object_id(&found_oid, p, first - 1)) extend_abbrev_len(&found_oid, oid, &len); } *out = len; } static int odb_source_packed_find_abbrev_len(struct odb_source *source, const struct object_id *oid, unsigned min_len, unsigned *out) { struct odb_source_packed *packed = odb_source_packed_downcast(source); struct packfile_list_entry *e; struct multi_pack_index *m; m = get_multi_pack_index(packed); if (m) find_abbrev_len_for_midx(m, oid, min_len, &min_len); for (e = packfile_store_get_packs(packed); e; e = e->next) { if (e->pack->multi_pack_index) continue; if (open_pack_index(e->pack) || !e->pack->num_objects) continue; find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len); } *out = min_len; return 0; } static int odb_source_packed_freshen_object(struct odb_source *source, const struct object_id *oid) { struct odb_source_packed *packed = odb_source_packed_downcast(source); struct pack_entry e; if (!find_pack_entry(packed, oid, &e)) return 0; if (e.p->is_cruft) return 0; if (e.p->freshened) return 1; if (utime(e.p->pack_name, NULL)) return 0; e.p->freshened = 1; return 1; } static int odb_source_packed_write_object(struct odb_source *source UNUSED, const void *buf UNUSED, unsigned long len UNUSED, enum object_type type UNUSED, struct object_id *oid UNUSED, struct object_id *compat_oid UNUSED, unsigned flags UNUSED) { return error("packed backend cannot write objects"); } static int odb_source_packed_write_object_stream(struct odb_source *source UNUSED, struct odb_write_stream *stream UNUSED, size_t len UNUSED, struct object_id *oid UNUSED) { return error("packed backend cannot write object streams"); } static int odb_source_packed_begin_transaction(struct odb_source *source UNUSED, struct odb_transaction **out UNUSED) { return error("packed backend cannot begin transactions"); } static int odb_source_packed_read_alternates(struct odb_source *source UNUSED, struct strvec *out UNUSED) { return 0; } static int odb_source_packed_write_alternate(struct odb_source *source UNUSED, const char *alternate UNUSED) { return error("packed backend cannot write alternates"); } void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, int seen_bits, int first, int last) { if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX)) return; for (; first < last; first++) report_garbage(seen_bits, list->items[first].string); } static void report_pack_garbage(struct string_list *list) { int baselen = -1, first = 0, seen_bits = 0; if (!report_garbage) return; string_list_sort(list); for (size_t i = 0; i < list->nr; i++) { const char *path = list->items[i].string; if (baselen != -1 && strncmp(path, list->items[first].string, baselen)) { report_helper(list, seen_bits, first, i); baselen = -1; seen_bits = 0; } if (baselen == -1) { const char *dot = strrchr(path, '.'); if (!dot) { report_garbage(PACKDIR_FILE_GARBAGE, path); continue; } baselen = dot - path + 1; first = i; } if (!strcmp(path + baselen, "pack")) seen_bits |= 1; else if (!strcmp(path + baselen, "idx")) seen_bits |= 2; } report_helper(list, seen_bits, first, list->nr); } struct prepare_pack_data { struct odb_source_packed *source; struct string_list *garbage; }; static void prepare_pack(const char *full_name, size_t full_name_len, const char *file_name, void *_data) { struct prepare_pack_data *data = (struct prepare_pack_data *)_data; size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->source->midx && midx_contains_pack(data->source->midx, file_name))) { char *trimmed_path = xstrndup(full_name, full_name_len); packfile_store_load_pack(data->source, trimmed_path, data->source->base.local); free(trimmed_path); } if (!report_garbage) return; if (!strcmp(file_name, "multi-pack-index") || !strcmp(file_name, "multi-pack-index.d")) return; if (starts_with(file_name, "multi-pack-index") && (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev"))) return; if (ends_with(file_name, ".idx") || ends_with(file_name, ".rev") || ends_with(file_name, ".pack") || ends_with(file_name, ".bitmap") || ends_with(file_name, ".keep") || ends_with(file_name, ".promisor") || ends_with(file_name, ".mtimes")) string_list_append(data->garbage, full_name); else report_garbage(PACKDIR_FILE_GARBAGE, full_name); } static void prepare_packed_git_one(struct odb_source_packed *source) { struct string_list garbage = STRING_LIST_INIT_DUP; struct prepare_pack_data data = { .source = source, .garbage = &garbage, }; for_each_file_in_pack_dir(source->base.path, prepare_pack, &data); report_pack_garbage(data.garbage); string_list_clear(data.garbage, 0); } DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next); static int sort_pack(const struct packfile_list_entry *a, const struct packfile_list_entry *b) { int st; /* * Local packs tend to contain objects specific to our * variant of the project than remote ones. In addition, * remote ones could be on a network mounted filesystem. * Favor local ones for these reasons. */ st = a->pack->pack_local - b->pack->pack_local; if (st) return -st; /* * Younger packs tend to contain more recent objects, * and more recent objects tend to get accessed more * often. */ if (a->pack->mtime < b->pack->mtime) return 1; else if (a->pack->mtime == b->pack->mtime) return 0; return -1; } void odb_source_packed_prepare(struct odb_source_packed *source) { if (source->initialized) return; prepare_multi_pack_index_one(source); prepare_packed_git_one(source); sort_packs(&source->packs.head, sort_pack); for (struct packfile_list_entry *e = source->packs.head; e; e = e->next) if (!e->next) source->packs.tail = e; source->initialized = true; } static void odb_source_packed_reprepare(struct odb_source *source) { struct odb_source_packed *packed = odb_source_packed_downcast(source); packed->initialized = false; odb_source_packed_prepare(packed); } static void odb_source_packed_reparent(const char *name UNUSED, const char *old_cwd, const char *new_cwd, void *cb_data) { struct odb_source_packed *packed = cb_data; char *path = reparent_relative_path(old_cwd, new_cwd, packed->base.path); free(packed->base.path); packed->base.path = path; } static void odb_source_packed_close(struct odb_source *source) { struct odb_source_packed *packed = odb_source_packed_downcast(source); for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) { if (e->pack->do_not_close) BUG("want to close pack marked 'do-not-close'"); close_pack(e->pack); } if (packed->midx) close_midx(packed->midx); packed->midx = NULL; } static void odb_source_packed_free(struct odb_source *source) { struct odb_source_packed *packed = odb_source_packed_downcast(source); chdir_notify_unregister(NULL, odb_source_packed_reparent, packed); for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) free(e->pack); packfile_list_clear(&packed->packs); strmap_clear(&packed->packs_by_path, 0); odb_source_release(&packed->base); free(packed); } struct odb_source_packed *odb_source_packed_new(struct object_database *odb, const char *path, bool local) { struct odb_source_packed *packed; CALLOC_ARRAY(packed, 1); odb_source_init(&packed->base, odb, ODB_SOURCE_PACKED, path, local); strmap_init(&packed->packs_by_path); packed->base.free = odb_source_packed_free; packed->base.close = odb_source_packed_close; packed->base.reprepare = odb_source_packed_reprepare; packed->base.read_object_info = odb_source_packed_read_object_info; packed->base.read_object_stream = odb_source_packed_read_object_stream; packed->base.for_each_object = odb_source_packed_for_each_object; packed->base.count_objects = odb_source_packed_count_objects; packed->base.find_abbrev_len = odb_source_packed_find_abbrev_len; packed->base.freshen_object = odb_source_packed_freshen_object; packed->base.write_object = odb_source_packed_write_object; packed->base.write_object_stream = odb_source_packed_write_object_stream; packed->base.begin_transaction = odb_source_packed_begin_transaction; packed->base.read_alternates = odb_source_packed_read_alternates; packed->base.write_alternate = odb_source_packed_write_alternate; if (!is_absolute_path(path)) chdir_notify_register(NULL, odb_source_packed_reparent, packed); return packed; }