From 66387793677c660bbafeb8bbf1ec2ac914e6a1d0 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 21 Jan 2025 04:34:10 +0100 Subject: [PATCH 1/3] refs: mark `ref_transaction_update_reflog()` as static The `ref_transaction_update_reflog()` function is only used within 'refs.c', so mark it as static. Reported-by: Junio C Hamano Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 22 +++++++++++++++------- refs.h | 14 -------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/refs.c b/refs.c index a5851a7de0..4c67c95fbe 100644 --- a/refs.c +++ b/refs.c @@ -1270,13 +1270,21 @@ int ref_transaction_update(struct ref_transaction *transaction, return 0; } -int ref_transaction_update_reflog(struct ref_transaction *transaction, - const char *refname, - const struct object_id *new_oid, - const struct object_id *old_oid, - const char *committer_info, unsigned int flags, - const char *msg, unsigned int index, - struct strbuf *err) +/* + * Similar to`ref_transaction_update`, but this function is only for adding + * a reflog update. Supports providing custom committer information. The index + * field can be utiltized to order updates as desired. When not used, the + * updates default to being ordered by refname. + */ +static int ref_transaction_update_reflog(struct ref_transaction *transaction, + const char *refname, + const struct object_id *new_oid, + const struct object_id *old_oid, + const char *committer_info, + unsigned int flags, + const char *msg, + unsigned int index, + struct strbuf *err) { struct ref_update *update; diff --git a/refs.h b/refs.h index b0dfc65ed2..a5bedf48cf 100644 --- a/refs.h +++ b/refs.h @@ -727,20 +727,6 @@ int ref_transaction_update(struct ref_transaction *transaction, unsigned int flags, const char *msg, struct strbuf *err); -/* - * Similar to`ref_transaction_update`, but this function is only for adding - * a reflog update. Supports providing custom committer information. The index - * field can be utiltized to order updates as desired. When not used, the - * updates default to being ordered by refname. - */ -int ref_transaction_update_reflog(struct ref_transaction *transaction, - const char *refname, - const struct object_id *new_oid, - const struct object_id *old_oid, - const char *committer_info, unsigned int flags, - const char *msg, unsigned int index, - struct strbuf *err); - /* * Add a reference creation to transaction. new_oid is the value that * the reference should have after the update; it must not be From a89e12dc164f43f9e728ac096db02bd7e403bdbc Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 21 Jan 2025 04:34:11 +0100 Subject: [PATCH 2/3] refs: use 'uint64_t' for 'ref_update.index' The 'ref_update.index' variable is used to store an index for a given reference update. This index is used to order the updates in a predetermined order, while the default ordering is alphabetical as per the refname. For large repositories with millions of references, it should be safer to use 'uint64_t'. Let's do that. This also is applied for all other code sections where we store 'index' and pass it around. Reported-by: brian m. carlson Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 4 ++-- refs/refs-internal.h | 4 ++-- refs/reftable-backend.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/refs.c b/refs.c index 4c67c95fbe..c65a554a73 100644 --- a/refs.c +++ b/refs.c @@ -1283,7 +1283,7 @@ static int ref_transaction_update_reflog(struct ref_transaction *transaction, const char *committer_info, unsigned int flags, const char *msg, - unsigned int index, + uint64_t index, struct strbuf *err) { struct ref_update *update; @@ -2731,7 +2731,7 @@ done: } struct reflog_migration_data { - unsigned int index; + uint64_t index; const char *refname; struct ref_store *old_refs; struct ref_transaction *transaction; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 2aaff91ab4..ecb25f6d20 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -120,7 +120,7 @@ struct ref_update { * when migrating reflogs and we want to ensure we carry over the * same order. */ - unsigned int index; + uint64_t index; /* * If this ref_update was split off of a symref update via @@ -203,7 +203,7 @@ struct ref_transaction { enum ref_transaction_state state; void *backend_data; unsigned int flags; - unsigned int max_index; + uint64_t max_index; }; /* diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 68db2baa8f..628bfa0c5b 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -852,7 +852,7 @@ struct write_transaction_table_arg { size_t updates_nr; size_t updates_alloc; size_t updates_expected; - unsigned int max_index; + uint64_t max_index; }; struct reftable_transaction_data { From 148560f1e3bbdb9cf27533bed43cdf116bf61d01 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Tue, 21 Jan 2025 04:34:12 +0100 Subject: [PATCH 3/3] reftable: prevent 'update_index' changes after adding records The function `reftable_writer_set_limits()` allows updating the 'min_update_index' and 'max_update_index' of a reftable writer. These values are written to both the writer's header and footer. Since the header is written during the first block write, any subsequent changes to the update index would create a mismatch between the header and footer values. The footer would contain the newer values while the header retained the original ones. To fix this bug, prevent callers from updating these values after any record is written. To do this, modify the function to return an error whenever the limits are modified after any record adds. Check for record adds within `reftable_writer_set_limits()` by checking the `last_key` variable, which is set whenever a new record is added. Modify all callers of the function to anticipate a return type and handle it accordingly. Helped-by: Patrick Steinhardt Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 20 +++++++++++++++----- reftable/reftable-error.h | 1 + reftable/reftable-writer.h | 24 ++++++++++++++---------- reftable/stack.c | 6 ++++-- reftable/writer.c | 13 +++++++++++-- t/unit-tests/t-reftable-stack.c | 8 +++++--- 6 files changed, 50 insertions(+), 22 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 628bfa0c5b..b94a6abae0 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1317,7 +1317,9 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data * multiple entries. Each entry will contain a different update_index, * so set the limits accordingly. */ - reftable_writer_set_limits(writer, ts, ts + arg->max_index); + ret = reftable_writer_set_limits(writer, ts, ts + arg->max_index); + if (ret < 0) + goto done; for (i = 0; i < arg->updates_nr; i++) { struct reftable_transaction_update *tx_update = &arg->updates[i]; @@ -1640,7 +1642,9 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack); if (arg->delete_old) creation_ts++; - reftable_writer_set_limits(writer, deletion_ts, creation_ts); + ret = reftable_writer_set_limits(writer, deletion_ts, creation_ts); + if (ret < 0) + goto done; /* * Add the new reference. If this is a rename then we also delete the @@ -2160,7 +2164,9 @@ static int write_reflog_existence_table(struct reftable_writer *writer, if (ret <= 0) goto done; - reftable_writer_set_limits(writer, ts, ts); + ret = reftable_writer_set_limits(writer, ts, ts); + if (ret < 0) + goto done; /* * The existence entry has both old and new object ID set to the @@ -2219,7 +2225,9 @@ static int write_reflog_delete_table(struct reftable_writer *writer, void *cb_da uint64_t ts = reftable_stack_next_update_index(arg->stack); int ret; - reftable_writer_set_limits(writer, ts, ts); + ret = reftable_writer_set_limits(writer, ts, ts); + if (ret < 0) + goto out; ret = reftable_stack_init_log_iterator(arg->stack, &it); if (ret < 0) @@ -2295,7 +2303,9 @@ static int write_reflog_expiry_table(struct reftable_writer *writer, void *cb_da if (arg->records[i].value_type == REFTABLE_LOG_UPDATE) live_records++; - reftable_writer_set_limits(writer, ts, ts); + ret = reftable_writer_set_limits(writer, ts, ts); + if (ret < 0) + return ret; if (!is_null_oid(&arg->update_oid)) { struct reftable_ref_record ref = {0}; diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index f404826562..a7e33d964d 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -30,6 +30,7 @@ enum reftable_error { /* Misuse of the API: * - on writing a record with NULL refname. + * - on writing a record before setting the writer limits. * - on writing a reftable_ref_record outside the table limits * - on writing a ref or log record before the stack's * next_update_inde*x diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index e4fc953788..46776578ef 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -109,17 +109,21 @@ int reftable_writer_new(struct reftable_writer **out, int (*flush_func)(void *), void *writer_arg, const struct reftable_write_options *opts); -/* Set the range of update indices for the records we will add. When writing a - table into a stack, the min should be at least - reftable_stack_next_update_index(), or REFTABLE_API_ERROR is returned. - - For transactional updates to a stack, typically min==max, and the - update_index can be obtained by inspeciting the stack. When converting an - existing ref database into a single reftable, this would be a range of - update-index timestamps. +/* + * Set the range of update indices for the records we will add. When writing a + * table into a stack, the min should be at least + * reftable_stack_next_update_index(), or REFTABLE_API_ERROR is returned. + * + * For transactional updates to a stack, typically min==max, and the + * update_index can be obtained by inspeciting the stack. When converting an + * existing ref database into a single reftable, this would be a range of + * update-index timestamps. + * + * The function should be called before adding any records to the writer. If not + * it will fail with REFTABLE_API_ERROR. */ -void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, - uint64_t max); +int reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, + uint64_t max); /* Add a reftable_ref_record. The record should have names that come after diff --git a/reftable/stack.c b/reftable/stack.c index c33979536e..95938ec1ef 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -1045,8 +1045,10 @@ static int stack_write_compact(struct reftable_stack *st, for (size_t i = first; i <= last; i++) st->stats.bytes += st->readers[i]->size; - reftable_writer_set_limits(wr, st->readers[first]->min_update_index, - st->readers[last]->max_update_index); + err = reftable_writer_set_limits(wr, st->readers[first]->min_update_index, + st->readers[last]->max_update_index); + if (err < 0) + goto done; err = reftable_merged_table_new(&mt, st->readers + first, subtabs_len, st->opts.hash_id); diff --git a/reftable/writer.c b/reftable/writer.c index 0b88e5583b..0bd834090c 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -165,11 +165,20 @@ int reftable_writer_new(struct reftable_writer **out, return 0; } -void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, - uint64_t max) +int reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, + uint64_t max) { + /* + * The limits should be set before any records are added to the writer. + * Check if any records were added by checking if `last_key` was set. + */ + if (w->last_key.len) + return REFTABLE_API_ERROR; + w->min_update_index = min; w->max_update_index = max; + + return 0; } static void writer_release(struct reftable_writer *w) diff --git a/t/unit-tests/t-reftable-stack.c b/t/unit-tests/t-reftable-stack.c index 52b81475c3..2738f449f0 100644 --- a/t/unit-tests/t-reftable-stack.c +++ b/t/unit-tests/t-reftable-stack.c @@ -98,7 +98,8 @@ static void t_read_file(void) static int write_test_ref(struct reftable_writer *wr, void *arg) { struct reftable_ref_record *ref = arg; - reftable_writer_set_limits(wr, ref->update_index, ref->update_index); + check(!reftable_writer_set_limits(wr, ref->update_index, + ref->update_index)); return reftable_writer_add_ref(wr, ref); } @@ -138,7 +139,8 @@ static int write_test_log(struct reftable_writer *wr, void *arg) { struct write_log_arg *wla = arg; - reftable_writer_set_limits(wr, wla->update_index, wla->update_index); + check(!reftable_writer_set_limits(wr, wla->update_index, + wla->update_index)); return reftable_writer_add_log(wr, wla->log); } @@ -956,7 +958,7 @@ static void t_reflog_expire(void) static int write_nothing(struct reftable_writer *wr, void *arg UNUSED) { - reftable_writer_set_limits(wr, 1, 1); + check(!reftable_writer_set_limits(wr, 1, 1)); return 0; }