From 211fa8b2d016208e1f37727c118a073dd415bed1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 07:54:18 +0200 Subject: refs/files: detect race when generating reflog entry for HEAD When updating a reference that is being pointed to HEAD we don't only write a reflog message for that particular reference, but also generate one for HEAD. This logic is handled by `split_head_update()`, where we: 1. Verify that the condition actually triggered. This is done by reading HEAD at the start of the transaction so that we can then check whether a given reference update refers to its target. 2. Queue a new log-only update for HEAD in case it did. But the logic is unfortunately not free of races, as we do not lock the HEAD reference after we have read its target. This can lead to the following two scenarios: - HEAD gets concurrently updated to point to one of the references we have already processed. This causes us not writing a reflog message even though we should have done so. - HEAD gets concurrently updated to no longer point to a reference anymore that we have already processed. This causes us to write a reflog message even though we should _not_ have done so. Improve the situation by introducing a new `REF_LOG_VIA_SPLIT` flag that is specific to the "files" backend. If set, we will double check that the HEAD reference still points to the reference that we are creating the reflog entry for after we have locked HEAD. Furthermore, instead of manually resolving the old object ID of that entry, we now use the same old state as for the parent update. If we detect such a racy update we abort the transaction. This is a bit heavy-handed: the user didn't even ask us to write a reflog update for "HEAD", so it might be surprising if we abort the transaction. That being said: - Normal users wouldn't typically hit this case as we only hit the relevant code when committing to a branch that is being pointed to by "HEAD" directly. Commands like git-commit(1) typically commit to "HEAD" itself though. - Scripted users that use git-update-ref(1) and related plumbing commands are unlikely to hit this case either, as they would have to update the pointed-to-branch at the same as "HEAD" is being updated, which is an exceedingly rare event. The alternative would be to instead drop the log-only update completely, but that would require more logic that is hard to verify without adding infrastructure specific for such a test. So we rather do the pragmatic thing and don't worry too much about an edge case that is very unlikely to happen. Unfortunately, this change only helps with the second race. We cannot reliably plug the first race without locking the HEAD reference at the start of the transaction. Locking HEAD unconditionally would effectively serialize all writes though, and that doesn't seem like an option. Also, double checking its value at the end of the transaction is not an option either, as its target may have flip-flopped during the transaction. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'refs/files-backend.c') diff --git a/refs/files-backend.c b/refs/files-backend.c index bf6f89b1d1..ba018b0984 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -68,6 +68,12 @@ */ #define REF_DELETED_RMDIR (1 << 9) +/* + * Used to indicate that the reflog-only update has been created via + * `split_head_update()`. + */ +#define REF_LOG_VIA_SPLIT (1 << 14) + struct ref_lock { char *ref_name; struct lock_file lk; @@ -2420,9 +2426,10 @@ static enum ref_transaction_error split_head_update(struct ref_update *update, new_update = ref_transaction_add_update( transaction, "HEAD", - update->flags | REF_LOG_ONLY | REF_NO_DEREF, + update->flags | REF_LOG_ONLY | REF_NO_DEREF | REF_LOG_VIA_SPLIT, &update->new_oid, &update->old_oid, NULL, NULL, update->committer_info, update->msg); + new_update->parent_update = update; /* * Add "HEAD". This insertion is O(N) in the transaction @@ -2600,7 +2607,36 @@ static enum ref_transaction_error lock_ref_for_update(struct files_ref_store *re update->backend_data = lock; - if (update->type & REF_ISSYMREF) { + if (update->flags & REF_LOG_VIA_SPLIT) { + struct ref_lock *parent_lock; + + if (!update->parent_update) + BUG("split update without a parent"); + + parent_lock = update->parent_update->backend_data; + + /* + * Check that "HEAD" didn't racily change since we have looked + * it up. If it did we must refuse to write the reflog entry. + * + * Note that this does not catch all races: if "HEAD" was + * racily changed to point to one of the refs part of the + * transaction then we would miss writing the split reflog + * entry for "HEAD". + */ + if (!(update->type & REF_ISSYMREF) || + strcmp(update->parent_update->refname, referent.buf)) { + strbuf_addstr(err, "HEAD has been racily updated"); + ret = REF_TRANSACTION_ERROR_GENERIC; + goto out; + } + + if (update->flags & REF_HAVE_OLD) { + oidcpy(&lock->old_oid, &update->old_oid); + } else { + oidcpy(&lock->old_oid, &parent_lock->old_oid); + } + } else if (update->type & REF_ISSYMREF) { if (update->flags & REF_NO_DEREF) { /* * We won't be reading the referent as part of -- cgit v1.2.3 From 046c67325c66f9e29e3bbe249ee742477b7525ef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 07:54:19 +0200 Subject: refs: stop unsetting REF_HAVE_OLD for log-only updates The `REF_HAVE_OLD` flag indicates whether a given ref update has its old object ID set. If so, the value of that field is used to verify whether the current state of the reference matches this expected state. It is thus an important part of mitigating races with a concurrent process that updates the same set of references. When writing reflogs though we explicitly unset that flag. This is a sensible thing to do: the old state of reflog entry updates may not necessarily match the current on-disk state of its accompanying ref, but it's only intended to signal what old object ID we want to write into the new reflog entry. For example when migrating refs we end up writing many reflog entries for a single reference, and most likely those reflog entries will have many different old object IDs. But unsetting this flag also removes a useful signal, namely that the caller _did_ provide an old object ID for a given reflog entry. This signal will become useful in a subsequent commit, where we add a new flag that tells the transaction to use the provided old and new object IDs to write a reflog entry. The `REF_HAVE_OLD` flag is then used as a signal to verify that the caller really did provide an old object ID. Stop unsetting the flag so that we can use it as this described signal in a subsequent commit. Skip checking the old object ID for log-only updates so that we don't expect it to match the current on-disk state. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 8 +++----- refs/files-backend.c | 9 +++++---- refs/refs-internal.h | 3 ++- refs/reftable-backend.c | 12 +++--------- 4 files changed, 13 insertions(+), 19 deletions(-) (limited to 'refs/files-backend.c') diff --git a/refs.c b/refs.c index a5f9ffaa45..f88928de74 100644 --- a/refs.c +++ b/refs.c @@ -1393,11 +1393,6 @@ int ref_transaction_update_reflog(struct ref_transaction *transaction, update = ref_transaction_add_update(transaction, refname, flags, new_oid, old_oid, NULL, NULL, committer_info, msg); - /* - * While we do set the old_oid value, we unset the flag to skip - * old_oid verification which only makes sense for refs. - */ - update->flags &= ~REF_HAVE_OLD; update->index = index; /* @@ -3318,6 +3313,9 @@ done: int ref_update_expects_existing_old_ref(struct ref_update *update) { + if (update->flags & REF_LOG_ONLY) + return 0; + return (update->flags & REF_HAVE_OLD) && (!is_null_oid(&update->old_oid) || update->old_target); } diff --git a/refs/files-backend.c b/refs/files-backend.c index ba018b0984..85ab2ef2b9 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2500,7 +2500,6 @@ static enum ref_transaction_error split_symref_update(struct ref_update *update, * done when new_update is processed. */ update->flags |= REF_LOG_ONLY | REF_NO_DEREF; - update->flags &= ~REF_HAVE_OLD; return 0; } @@ -2515,8 +2514,9 @@ static enum ref_transaction_error check_old_oid(struct ref_update *update, struct object_id *oid, struct strbuf *err) { - if (!(update->flags & REF_HAVE_OLD) || - oideq(oid, &update->old_oid)) + if (update->flags & REF_LOG_ONLY || + !(update->flags & REF_HAVE_OLD) || + oideq(oid, &update->old_oid)) return 0; if (is_null_oid(&update->old_oid)) { @@ -3095,7 +3095,8 @@ static int files_transaction_finish_initial(struct files_ref_store *refs, for (i = 0; i < transaction->nr; i++) { struct ref_update *update = transaction->updates[i]; - if ((update->flags & REF_HAVE_OLD) && + if (!(update->flags & REF_LOG_ONLY) && + (update->flags & REF_HAVE_OLD) && !is_null_oid(&update->old_oid)) BUG("initial ref transaction with old_sha1 set"); diff --git a/refs/refs-internal.h b/refs/refs-internal.h index f868870851..95a4dc3902 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -802,7 +802,8 @@ enum ref_transaction_error ref_update_check_old_target(const char *referent, /* * Check if the ref must exist, this means that the old_oid or - * old_target is non NULL. + * old_target is non NULL. Log-only updates never require the old state to + * match. */ int ref_update_expects_existing_old_ref(struct ref_update *update); diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 4c3817f4ec..44af58ac50 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1180,8 +1180,6 @@ static enum ref_transaction_error prepare_single_update(struct reftable_ref_stor if (ret > 0) { /* The reference does not exist, but we expected it to. */ strbuf_addf(err, _("cannot lock ref '%s': " - - "unable to resolve reference '%s'"), ref_update_original_update_refname(u), u->refname); return REF_TRANSACTION_ERROR_NONEXISTENT_REF; @@ -1235,13 +1233,8 @@ static enum ref_transaction_error prepare_single_update(struct reftable_ref_stor new_update->parent_update = u; - /* - * Change the symbolic ref update to log only. Also, it - * doesn't need to check its old OID value, as that will be - * done when new_update is processed. - */ + /* Change the symbolic ref update to log only. */ u->flags |= REF_LOG_ONLY | REF_NO_DEREF; - u->flags &= ~REF_HAVE_OLD; } } @@ -1265,7 +1258,8 @@ static enum ref_transaction_error prepare_single_update(struct reftable_ref_stor ret = ref_update_check_old_target(referent->buf, u, err); if (ret) return ret; - } else if ((u->flags & REF_HAVE_OLD) && !oideq(¤t_oid, &u->old_oid)) { + } else if ((u->flags & (REF_LOG_ONLY | REF_HAVE_OLD)) == REF_HAVE_OLD && + !oideq(¤t_oid, &u->old_oid)) { if (is_null_oid(&u->old_oid)) { strbuf_addf(err, _("cannot lock ref '%s': " "reference already exists"), -- cgit v1.2.3 From 465eff81de44763832e96fbe609ec269f1c23ade Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 6 Aug 2025 07:54:20 +0200 Subject: refs: fix invalid old object IDs when migrating reflogs When migrating reflog entries between different storage formats we end up with invalid old object IDs for the migrated entries: instead of writing the old object ID of the to-be-migrated entry, we end up with the all-zeroes object ID. The root cause of this issue is that we don't know to use the old object ID provided by the caller. Instead, we manually resolve the old object ID by resolving the current value of its matching reference. But as that reference does not yet exist in the target ref storage we always end up resolving it to all-zeroes. This issue got unnoticed as there is no user-facing command that would even show the old object ID. While `git log -g` knows to show the new object ID, we don't have any formatting directive to show the old object ID. Fix the bug by introducing a new flag `REF_LOG_USE_PROVIDED_OIDS`. If set, backends are instructed to use the old and new object IDs provided by the caller, without doing any manual resolving. Set this flag in `ref_transaction_update_reflog()`. Amend our tests in t1460-refs-migrate to use our test tool to read reflog entries. This test tool prints out both old and new object ID of each reflog entry, which fixes the test gap. Furthermore it also prints the full identity used to write the reflog, which provides test coverage for the previous commit in this patch series that fixed the identity for migrated reflogs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 3 ++- refs.h | 9 ++++++++- refs/files-backend.c | 16 +++++++++++++++- refs/reftable-backend.c | 14 ++++++++++++++ t/t1421-reflog-write.sh | 4 +--- t/t1460-refs-migrate.sh | 22 +++++++++++++++------- 6 files changed, 55 insertions(+), 13 deletions(-) (limited to 'refs/files-backend.c') diff --git a/refs.c b/refs.c index f88928de74..946eb48941 100644 --- a/refs.c +++ b/refs.c @@ -1385,7 +1385,8 @@ int ref_transaction_update_reflog(struct ref_transaction *transaction, assert(err); - flags = REF_HAVE_OLD | REF_HAVE_NEW | REF_LOG_ONLY | REF_FORCE_CREATE_REFLOG | REF_NO_DEREF; + flags = REF_HAVE_OLD | REF_HAVE_NEW | REF_LOG_ONLY | REF_FORCE_CREATE_REFLOG | REF_NO_DEREF | + REF_LOG_USE_PROVIDED_OIDS; if (!transaction_refname_valid(refname, new_oid, flags, err)) return -1; diff --git a/refs.h b/refs.h index 253dd8f4d5..090b4fdff4 100644 --- a/refs.h +++ b/refs.h @@ -760,13 +760,20 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, */ #define REF_SKIP_CREATE_REFLOG (1 << 12) +/* + * When writing a REF_LOG_ONLY record, use the old and new object IDs provided + * in the update instead of resolving the old object ID. The caller must also + * set both REF_HAVE_OLD and REF_HAVE_NEW. + */ +#define REF_LOG_USE_PROVIDED_OIDS (1 << 13) + /* * Bitmask of all of the flags that are allowed to be passed in to * ref_transaction_update() and friends: */ #define REF_TRANSACTION_UPDATE_ALLOWED_FLAGS \ (REF_NO_DEREF | REF_FORCE_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION | \ - REF_SKIP_REFNAME_VERIFICATION | REF_SKIP_CREATE_REFLOG) + REF_SKIP_REFNAME_VERIFICATION | REF_SKIP_CREATE_REFLOG | REF_LOG_USE_PROVIDED_OIDS) /* * Add a reference update to transaction. `new_oid` is the value that diff --git a/refs/files-backend.c b/refs/files-backend.c index 85ab2ef2b9..905555365b 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3010,6 +3010,20 @@ static int parse_and_write_reflog(struct files_ref_store *refs, struct ref_lock *lock, struct strbuf *err) { + struct object_id *old_oid = &lock->old_oid; + + if (update->flags & REF_LOG_USE_PROVIDED_OIDS) { + if (!(update->flags & REF_HAVE_OLD) || + !(update->flags & REF_HAVE_NEW) || + !(update->flags & REF_LOG_ONLY)) { + strbuf_addf(err, _("trying to write reflog for '%s'" + "with incomplete values"), update->refname); + return REF_TRANSACTION_ERROR_GENERIC; + } + + old_oid = &update->old_oid; + } + if (update->new_target) { /* * We want to get the resolved OID for the target, to ensure @@ -3027,7 +3041,7 @@ static int parse_and_write_reflog(struct files_ref_store *refs, } } - if (files_log_ref_write(refs, lock->ref_name, &lock->old_oid, + if (files_log_ref_write(refs, lock->ref_name, old_oid, &update->new_oid, update->committer_info, update->msg, update->flags, err)) { char *old_msg = strbuf_detach(err, NULL); diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 44af58ac50..99fafd75eb 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1096,6 +1096,20 @@ static enum ref_transaction_error prepare_single_update(struct reftable_ref_stor if (ret) return REF_TRANSACTION_ERROR_GENERIC; + if (u->flags & REF_LOG_USE_PROVIDED_OIDS) { + if (!(u->flags & REF_HAVE_OLD) || + !(u->flags & REF_HAVE_NEW) || + !(u->flags & REF_LOG_ONLY)) { + strbuf_addf(err, _("trying to write reflog for '%s'" + "with incomplete values"), u->refname); + return REF_TRANSACTION_ERROR_GENERIC; + } + + if (queue_transaction_update(refs, tx_data, u, &u->old_oid, err)) + return REF_TRANSACTION_ERROR_GENERIC; + return 0; + } + /* Verify that the new object ID is valid. */ if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) && !(u->flags & REF_SKIP_OID_VERIFICATION) && diff --git a/t/t1421-reflog-write.sh b/t/t1421-reflog-write.sh index dd7ffa5241..46df64c176 100755 --- a/t/t1421-reflog-write.sh +++ b/t/t1421-reflog-write.sh @@ -101,11 +101,9 @@ test_expect_success 'simple writes' ' EOF git reflog write refs/heads/something $COMMIT_OID $COMMIT_OID second && - # Note: the old object ID of the second reflog entry is broken. - # This will be fixed in subsequent commits. test_reflog_matches . refs/heads/something <<-EOF $ZERO_OID $COMMIT_OID $SIGNATURE first - $ZERO_OID $COMMIT_OID $SIGNATURE second + $COMMIT_OID $COMMIT_OID $SIGNATURE second EOF ) ' diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh index 2ab97e1b7d..0e1116a319 100755 --- a/t/t1460-refs-migrate.sh +++ b/t/t1460-refs-migrate.sh @@ -7,6 +7,17 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +print_all_reflog_entries () { + repo=$1 && + test-tool -C "$repo" ref-store main for-each-reflog >reflogs && + while read reflog + do + echo "REFLOG: $reflog" && + test-tool -C "$repo" ref-store main for-each-reflog-ent "$reflog" || + return 1 + done [ []] @@ -28,8 +39,7 @@ test_migration () { --format='%(refname) %(objectname) %(symref)' >expect && if ! $skip_reflog_verify then - git -C "$repo" reflog --all >expect_logs && - git -C "$repo" reflog list >expect_log_list + print_all_reflog_entries "$repo" >expect_logs fi && git -C "$repo" refs migrate --ref-format="$format" "$@" && @@ -39,10 +49,8 @@ test_migration () { test_cmp expect actual && if ! $skip_reflog_verify then - git -C "$repo" reflog --all >actual_logs && - git -C "$repo" reflog list >actual_log_list && - test_cmp expect_logs actual_logs && - test_cmp expect_log_list actual_log_list + print_all_reflog_entries "$repo" >actual_logs && + test_cmp expect_logs actual_logs fi && git -C "$repo" rev-parse --show-ref-format >actual && @@ -273,7 +281,7 @@ test_expect_success 'multiple reftable blocks with multiple entries' ' test_commit -C repo second && printf "update refs/heads/ref-%d HEAD\n" $(test_seq 3000) >stdin && git -C repo update-ref --stdin