From ab6f79d8df7c7799ed38229eb56811fda36853ae Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:27:17 +0800 Subject: refs: set up ref consistency check infrastructure The "struct ref_store" is the base class which contains the "be" pointer which provides backend-specific functions whose interfaces are defined in the "ref_storage_be". We could reuse this polymorphism to define only one interface. For every backend, we need to provide its own function pointer. The interfaces defined in the `ref_storage_be` are carefully structured in semantic. It's organized as the five parts: 1. The name and the initialization interfaces. 2. The ref transaction interfaces. 3. The ref internal interfaces (pack, rename and copy). 4. The ref filesystem interfaces. 5. The reflog related interfaces. To keep consistent with the git-fsck(1), add a new interface named "fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be grouped into any above five categories. Explicitly add blank line to make it different from others. Last, implement placeholder functions for each ref backends. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs/files-backend.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'refs/files-backend.c') diff --git a/refs/files-backend.c b/refs/files-backend.c index aa52d9be7c..4630eb1f80 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3408,6 +3408,15 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store, return ret; } +static int files_fsck(struct ref_store *ref_store, + struct fsck_options *o) +{ + struct files_ref_store *refs = + files_downcast(ref_store, REF_STORE_READ, "fsck"); + + return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o); +} + struct ref_storage_be refs_be_files = { .name = "files", .init = files_ref_store_init, @@ -3434,5 +3443,7 @@ struct ref_storage_be refs_be_files = { .reflog_exists = files_reflog_exists, .create_reflog = files_create_reflog, .delete_reflog = files_delete_reflog, - .reflog_expire = files_reflog_expire + .reflog_expire = files_reflog_expire, + + .fsck = files_fsck, }; -- cgit v1.2.3 From a7600b8481b533d77a2cc5f03acdbed12b996fcd Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:31:31 +0800 Subject: files-backend: add unified interface for refs scanning For refs and reflogs, we need to scan its corresponding directories to check every regular file or symbolic link which shares the same pattern. Introduce a unified interface for scanning directories for files-backend. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 3 ++ fsck.h | 1 + refs/files-backend.c | 73 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 1 deletion(-) (limited to 'refs/files-backend.c') diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index f643585a34..7c809fddf1 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -19,6 +19,9 @@ `badParentSha1`:: (ERROR) A commit object has a bad parent sha1. +`badRefFiletype`:: + (ERROR) A ref has a bad file type. + `badTagName`:: (INFO) A tag has an invalid format. diff --git a/fsck.h b/fsck.h index d551a9fe86..af02174973 100644 --- a/fsck.h +++ b/fsck.h @@ -31,6 +31,7 @@ enum fsck_msg_type { FUNC(BAD_NAME, ERROR) \ FUNC(BAD_OBJECT_SHA1, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ + FUNC(BAD_REF_FILETYPE, ERROR) \ FUNC(BAD_TIMEZONE, ERROR) \ FUNC(BAD_TREE, ERROR) \ FUNC(BAD_TREE_SHA1, ERROR) \ diff --git a/refs/files-backend.c b/refs/files-backend.c index 4630eb1f80..e511e1dcce 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -6,6 +6,7 @@ #include "../gettext.h" #include "../hash.h" #include "../hex.h" +#include "../fsck.h" #include "../refs.h" #include "refs-internal.h" #include "ref-cache.h" @@ -3408,13 +3409,83 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store, return ret; } +/* + * For refs and reflogs, they share a unified interface when scanning + * the whole directory. This function is used as the callback for each + * regular file or symlink in the directory. + */ +typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store, + struct fsck_options *o, + const char *refs_check_dir, + struct dir_iterator *iter); + +static int files_fsck_refs_dir(struct ref_store *ref_store, + struct fsck_options *o, + const char *refs_check_dir, + files_fsck_refs_fn *fsck_refs_fn) +{ + struct strbuf sb = STRBUF_INIT; + struct dir_iterator *iter; + int iter_status; + int ret = 0; + + strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir); + + iter = dir_iterator_begin(sb.buf, 0); + if (!iter) { + ret = error_errno(_("cannot open directory %s"), sb.buf); + goto out; + } + + while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) { + if (S_ISDIR(iter->st.st_mode)) { + continue; + } else if (S_ISREG(iter->st.st_mode) || + S_ISLNK(iter->st.st_mode)) { + if (o->verbose) + fprintf_ln(stderr, "Checking %s/%s", + refs_check_dir, iter->relative_path); + for (size_t i = 0; fsck_refs_fn[i]; i++) { + if (fsck_refs_fn[i](ref_store, o, refs_check_dir, iter)) + ret = -1; + } + } else { + struct fsck_ref_report report = { .path = iter->basename }; + if (fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_FILETYPE, + "unexpected file type")) + ret = -1; + } + } + + if (iter_status != ITER_DONE) + ret = error(_("failed to iterate over '%s'"), sb.buf); + +out: + strbuf_release(&sb); + return ret; +} + +static int files_fsck_refs(struct ref_store *ref_store, + struct fsck_options *o) +{ + files_fsck_refs_fn fsck_refs_fn[]= { + NULL, + }; + + if (o->verbose) + fprintf_ln(stderr, _("Checking references consistency")); + return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn); +} + static int files_fsck(struct ref_store *ref_store, struct fsck_options *o) { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_READ, "fsck"); - return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o); + return files_fsck_refs(ref_store, o) | + refs->packed_ref_store->be->fsck(refs->packed_ref_store, o); } struct ref_storage_be refs_be_files = { -- cgit v1.2.3 From 1c31be45b3b263670c7d2a91c27cc119b77dd2e2 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Thu, 8 Aug 2024 19:31:42 +0800 Subject: fsck: add ref name check for files backend The git-fsck(1) only implicitly checks the reference, it does not fully check refs with bad format name such as standalone "@". However, a file ending with ".lock" should not be marked as having a bad ref name. It is expected that concurrent writers may have such lock files. We currently ignore this situation. But for bare ".lock" file, we will report it as error. In order to provide such checks, add a new fsck message id "badRefName" with default ERROR type. Use existing "check_refname_format" to explicit check the ref name. And add a new unit test to verify the functionality. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 3 ++ fsck.h | 1 + refs/files-backend.c | 31 +++++++++++++++ t/t0602-reffiles-fsck.sh | 92 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100755 t/t0602-reffiles-fsck.sh (limited to 'refs/files-backend.c') diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index 7c809fddf1..68a2801f15 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -22,6 +22,9 @@ `badRefFiletype`:: (ERROR) A ref has a bad file type. +`badRefName`:: + (ERROR) A ref has an invalid format. + `badTagName`:: (INFO) A tag has an invalid format. diff --git a/fsck.h b/fsck.h index af02174973..500b4c04d2 100644 --- a/fsck.h +++ b/fsck.h @@ -32,6 +32,7 @@ enum fsck_msg_type { FUNC(BAD_OBJECT_SHA1, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ FUNC(BAD_REF_FILETYPE, ERROR) \ + FUNC(BAD_REF_NAME, ERROR) \ FUNC(BAD_TIMEZONE, ERROR) \ FUNC(BAD_TREE, ERROR) \ FUNC(BAD_TREE_SHA1, ERROR) \ diff --git a/refs/files-backend.c b/refs/files-backend.c index e511e1dcce..7f6eefa960 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3419,6 +3419,36 @@ typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store, const char *refs_check_dir, struct dir_iterator *iter); +static int files_fsck_refs_name(struct ref_store *ref_store UNUSED, + struct fsck_options *o, + const char *refs_check_dir, + struct dir_iterator *iter) +{ + struct strbuf sb = STRBUF_INIT; + int ret = 0; + + /* + * Ignore the files ending with ".lock" as they may be lock files + * However, do not allow bare ".lock" files. + */ + if (iter->basename[0] != '.' && ends_with(iter->basename, ".lock")) + goto cleanup; + + if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) { + struct fsck_ref_report report = { .path = NULL }; + + strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path); + report.path = sb.buf; + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_NAME, + "invalid refname format"); + } + +cleanup: + strbuf_release(&sb); + return ret; +} + static int files_fsck_refs_dir(struct ref_store *ref_store, struct fsck_options *o, const char *refs_check_dir, @@ -3470,6 +3500,7 @@ static int files_fsck_refs(struct ref_store *ref_store, struct fsck_options *o) { files_fsck_refs_fn fsck_refs_fn[]= { + files_fsck_refs_name, NULL, }; diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh new file mode 100755 index 0000000000..71a4d1a5ae --- /dev/null +++ b/t/t0602-reffiles-fsck.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +test_description='Test reffiles backend consistency check' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME +GIT_TEST_DEFAULT_REF_FORMAT=files +export GIT_TEST_DEFAULT_REF_FORMAT +TEST_PASSES_SANITIZE_LEAK=true + +. ./test-lib.sh + +test_expect_success 'ref name should be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + branch_dir_prefix=.git/refs/heads && + tag_dir_prefix=.git/refs/tags && + cd repo && + + git commit --allow-empty -m initial && + git checkout -b branch-1 && + git tag tag-1 && + git commit --allow-empty -m second && + git checkout -b branch-2 && + git tag tag-2 && + git tag multi_hierarchy/tag-2 && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/.branch-1: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/.branch-1 && + test_cmp expect err && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/@: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/@ && + test_cmp expect err && + + cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format + EOF + rm $tag_dir_prefix/multi_hierarchy/@ && + test_cmp expect err && + + cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock && + git refs verify 2>err && + rm $tag_dir_prefix/tag-1.lock && + test_must_be_empty err && + + cp $tag_dir_prefix/tag-1 $tag_dir_prefix/.lock && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/tags/.lock: badRefName: invalid refname format + EOF + rm $tag_dir_prefix/.lock && + test_cmp expect err +' + +test_expect_success 'ref name check should be adapted into fsck messages' ' + test_when_finished "rm -rf repo" && + git init repo && + branch_dir_prefix=.git/refs/heads && + tag_dir_prefix=.git/refs/tags && + cd repo && + git commit --allow-empty -m initial && + git checkout -b branch-1 && + git tag tag-1 && + git commit --allow-empty -m second && + git checkout -b branch-2 && + git tag tag-2 && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && + git -c fsck.badRefName=warn refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/.branch-1: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/.branch-1 && + test_cmp expect err && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ && + git -c fsck.badRefName=ignore refs verify 2>err && + test_must_be_empty err +' + +test_done -- cgit v1.2.3