From 98037f2bf2fb82845179f3311d8f64f7e27d77c4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 23 Jun 2020 17:47:00 +0000 Subject: commit-graph: place bloom_settings in context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Place an instance of struct bloom_settings into the struct write_commit_graph_context. This allows simplifying the function prototype of write_graph_chunk_bloom_data(). This will allow us to combine the function prototypes and use function pointers to simplify write_commit_graph_file(). By using a pointer, we can later replace the settings to match those that exist in the current commit-graph, in case a future Git version allows customization of these parameters. Reported-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 887837e882..d0fedcd9b1 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -882,6 +882,7 @@ struct write_commit_graph_context { const struct split_commit_graph_opts *split_opts; size_t total_bloom_filter_data_size; + const struct bloom_filter_settings *bloom_settings; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -1103,8 +1104,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, } static void write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx, - const struct bloom_filter_settings *settings) + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1116,9 +1116,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, _("Writing changed paths Bloom filters data"), ctx->commits.nr); - hashwrite_be32(f, settings->hash_version); - hashwrite_be32(f, settings->num_hashes); - hashwrite_be32(f, settings->bits_per_entry); + hashwrite_be32(f, ctx->bloom_settings->hash_version); + hashwrite_be32(f, ctx->bloom_settings->num_hashes); + hashwrite_be32(f, ctx->bloom_settings->bits_per_entry); while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); @@ -1541,6 +1541,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) struct object_id file_hash; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; + ctx->bloom_settings = &bloom_settings; + if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -1642,7 +1644,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_extra_edges(f, ctx); if (ctx->changed_paths) { write_graph_chunk_bloom_indexes(f, ctx); - write_graph_chunk_bloom_data(f, ctx, &bloom_settings); + write_graph_chunk_bloom_data(f, ctx); } if (ctx->num_commit_graphs_after > 1 && write_graph_chunk_base(f, ctx)) { -- cgit v1.2.3 From 7b671f8c2b6b7de511fd2f6587e4540c25764b61 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 23 Jun 2020 17:47:01 +0000 Subject: commit-graph: change test to die on parse, not load 43d3561 (commit-graph write: don't die if the existing graph is corrupt, 2019-03-25) introduced the GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD environment variable. This was created to verify that commit-graph was not loaded when writing a new non-incremental commit-graph. An upcoming change wants to load a commit-graph in some valuable cases, but we want to maintain that we don't trust the commit-graph data when writing our new file. Instead of dying on load, instead die if we ever try to parse a commit from the commit-graph. This functionally verifies the same intended behavior, but allows a more advanced feature in the next change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 12 ++++++++---- commit-graph.h | 2 +- t/t5318-commit-graph.sh | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index d0fedcd9b1..6a28d4a5a6 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -564,10 +564,6 @@ static int prepare_commit_graph(struct repository *r) return !!r->objects->commit_graph; r->objects->commit_graph_attempted = 1; - if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0)) - die("dying as requested by the '%s' variable on commit-graph load!", - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD); - prepare_repo_settings(r); if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && @@ -790,6 +786,14 @@ static int parse_commit_in_graph_one(struct repository *r, int parse_commit_in_graph(struct repository *r, struct commit *item) { + static int checked_env = 0; + + if (!checked_env && + git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0)) + die("dying as requested by the '%s' variable on commit-graph parse!", + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE); + checked_env = 1; + if (!prepare_commit_graph(r)) return 0; return parse_commit_in_graph_one(r, r->objects->commit_graph, item); diff --git a/commit-graph.h b/commit-graph.h index 881c9b46e5..f0fb13e3f2 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -5,7 +5,7 @@ #include "object-store.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" -#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" +#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" /* diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 1073f9e3cf..5ec01abdaa 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -436,7 +436,7 @@ corrupt_graph_verify() { cp $objdir/info/commit-graph commit-graph-pre-write-test fi && git status --short && - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write && + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write && git commit-graph verify } -- cgit v1.2.3 From 949197420e3da13c06c6a15fd4b4ed3120753c42 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 1 Jul 2020 13:27:23 +0000 Subject: bloom: fix logic in get_bloom_filter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get_bloom_filter() method is a bit complicated in some parts where it does not need to be. In particular, it needs to return a NULL filter only when compute_if_not_present is zero AND the filter data cannot be loaded from a commit-graph file. This currently happens by accident because the commit-graph does not load changed-path Bloom filters from an existing commit-graph when writing a new one. This will change in a later patch. Also clean up some style issues while we are here. One side-effect of returning a NULL filter is that the filters that are reported as "too large" will now be reported as NULL insead of length zero. This case was not properly covered before, so add a test. Further, remote the counting of the zero-length filters from revision.c and the trace2 logs. Helped-by: René Scharfe Helped-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bloom.c | 14 ++++++-------- commit-graph.c | 8 ++++++-- revision.c | 7 ------- t/t4216-log-bloom.sh | 24 ++++++++++++++++++++++-- 4 files changed, 34 insertions(+), 19 deletions(-) (limited to 'commit-graph.c') diff --git a/bloom.c b/bloom.c index c38d1cff0c..2af5389795 100644 --- a/bloom.c +++ b/bloom.c @@ -186,7 +186,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r, struct diff_options diffopt; int max_changes = 512; - if (bloom_filters.slab_size == 0) + if (!bloom_filters.slab_size) return NULL; filter = bloom_filter_slab_at(&bloom_filters, c); @@ -194,16 +194,14 @@ struct bloom_filter *get_bloom_filter(struct repository *r, if (!filter->data) { load_commit_graph_info(r, c); if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && - r->objects->commit_graph->chunk_bloom_indexes) { - if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) - return filter; - else - return NULL; - } + r->objects->commit_graph->chunk_bloom_indexes) + load_bloom_filter_from_graph(r->objects->commit_graph, filter, c); } - if (filter->data || !compute_if_not_present) + if (filter->data) return filter; + if (!compute_if_not_present) + return NULL; repo_diff_setup(r, &diffopt); diffopt.flags.recursive = 1; diff --git a/commit-graph.c b/commit-graph.c index 6a28d4a5a6..50ce039a53 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1098,7 +1098,8 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); - cur_pos += filter->len; + size_t len = filter ? filter->len : 0; + cur_pos += len; display_progress(progress, ++i); hashwrite_be32(f, cur_pos); list++; @@ -1126,8 +1127,11 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); + size_t len = filter ? filter->len : 0; display_progress(progress, ++i); - hashwrite(f, filter->data, filter->len * sizeof(unsigned char)); + + if (len) + hashwrite(f, filter->data, len * sizeof(unsigned char)); list++; } diff --git a/revision.c b/revision.c index c644c66091..7339750af1 100644 --- a/revision.c +++ b/revision.c @@ -633,7 +633,6 @@ static unsigned int count_bloom_filter_maybe; static unsigned int count_bloom_filter_definitely_not; static unsigned int count_bloom_filter_false_positive; static unsigned int count_bloom_filter_not_present; -static unsigned int count_bloom_filter_length_zero; static void trace2_bloom_filter_statistics_atexit(void) { @@ -641,7 +640,6 @@ static void trace2_bloom_filter_statistics_atexit(void) jw_object_begin(&jw, 0); jw_object_intmax(&jw, "filter_not_present", count_bloom_filter_not_present); - jw_object_intmax(&jw, "zero_length_filter", count_bloom_filter_length_zero); jw_object_intmax(&jw, "maybe", count_bloom_filter_maybe); jw_object_intmax(&jw, "definitely_not", count_bloom_filter_definitely_not); jw_object_intmax(&jw, "false_positive", count_bloom_filter_false_positive); @@ -735,11 +733,6 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, return -1; } - if (!filter->len) { - count_bloom_filter_length_zero++; - return -1; - } - result = bloom_filter_contains(filter, revs->bloom_key, revs->bloom_filter_settings); diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index c7011f33e2..2761208e74 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -60,7 +60,7 @@ setup () { test_bloom_filters_used () { log_args=$1 - bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"zero_length_filter\":0,\"maybe\"" + bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"maybe\"" setup "$log_args" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && test_cmp log_wo_bloom log_w_bloom && @@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' ' test_bloom_filters_used_when_some_filters_are_missing () { log_args=$1 - bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6" + bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":8,\"definitely_not\":6" setup "$log_args" && grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" && test_cmp log_wo_bloom log_w_bloom @@ -152,4 +152,24 @@ test_expect_success 'Use Bloom filters if they exist in the latest but not all c test_bloom_filters_used_when_some_filters_are_missing "-- A/B" ' +test_expect_success 'correctly report changes over limit' ' + git init 513changes && + ( + cd 513changes && + for i in $(test_seq 1 513) + do + echo $i >file$i.txt || return 1 + done && + git add . && + git commit -m "files" && + git commit-graph write --reachable --changed-paths && + for i in $(test_seq 1 513) + do + git -c core.commitGraph=false log -- file$i.txt >expect && + git log -- file$i.txt >actual && + test_cmp expect actual || return 1 + done + ) +' + test_done \ No newline at end of file -- cgit v1.2.3 From 0087a87ba8fc69b27dde0183ec24ade367a4aa5b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 1 Jul 2020 13:27:24 +0000 Subject: commit-graph: persist existence of changed-paths The changed-path Bloom filters were released in v2.27.0, but have a significant drawback. A user can opt-in to writing the changed-path filters using the "--changed-paths" option to "git commit-graph write" but the next write will drop the filters unless that option is specified. This becomes even more important when considering the interaction with gc.writeCommitGraph (on by default) or fetch.writeCommitGraph (part of features.experimental). These config options trigger commit-graph writes that the user did not signal, and hence there is no --changed-paths option available. Allow a user that opts-in to the changed-path filters to persist the property of "my commit-graph has changed-path filters" automatically. A user can drop filters using the --no-changed-paths option. In the process, we need to be extremely careful to match the Bloom filter settings as specified by the commit-graph. This will allow future versions of Git to customize these settings, and the version with this change will persist those settings as commit-graphs are rewritten on top. Use the trace2 API to signal the settings used during the write, and check that output in a test after manually adjusting the correct bytes in the commit-graph file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 ++++- builtin/commit-graph.c | 5 ++++- commit-graph.c | 45 +++++++++++++++++++++++++++++++++++--- commit-graph.h | 1 + t/t4216-log-bloom.sh | 17 +++++++++++++- 5 files changed, 67 insertions(+), 6 deletions(-) (limited to 'commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index f4b13c005b..369b222b08 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -60,7 +60,10 @@ existing commit-graph file. With the `--changed-paths` option, compute and write information about the paths changed between a commit and it's first parent. This operation can take a while on large repositories. It provides significant performance gains -for getting history of a directory or a file with `git log -- `. +for getting history of a directory or a file with `git log -- `. If +this option is given, future commit-graph writes will automatically assume +that this option was intended. Use `--no-changed-paths` to stop storing this +data. + With the `--split` option, write the commit-graph as a chain of multiple commit-graph files stored in `/info/commit-graphs`. The new commits diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 59009837dc..ff7b177c33 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -151,6 +151,7 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); + opts.enable_changed_paths = -1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -171,7 +172,9 @@ static int graph_write(int argc, const char **argv) flags |= COMMIT_GRAPH_WRITE_SPLIT; if (opts.progress) flags |= COMMIT_GRAPH_WRITE_PROGRESS; - if (opts.enable_changed_paths || + if (!opts.enable_changed_paths) + flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS; + if (opts.enable_changed_paths == 1 || git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0)) flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS; diff --git a/commit-graph.c b/commit-graph.c index 50ce039a53..6762704324 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -16,6 +16,8 @@ #include "progress.h" #include "bloom.h" #include "commit-slab.h" +#include "json-writer.h" +#include "trace2.h" void git_test_write_commit_graph_or_die(void) { @@ -1108,6 +1110,21 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, stop_progress(&progress); } +static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx) +{ + struct json_writer jw = JSON_WRITER_INIT; + + jw_object_begin(&jw, 0); + jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version); + jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes); + jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry); + jw_end(&jw); + + trace2_data_json("bloom", ctx->r, "settings", &jw); + + jw_release(&jw); +} + static void write_graph_chunk_bloom_data(struct hashfile *f, struct write_commit_graph_context *ctx) { @@ -1116,6 +1133,8 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, struct progress *progress = NULL; int i = 0; + trace2_bloom_filter_settings(ctx); + if (ctx->report_progress) progress = start_delayed_progress( _("Writing changed paths Bloom filters data"), @@ -1547,9 +1566,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) int num_chunks = 3; uint64_t chunk_offset; struct object_id file_hash; - const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; + struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; - ctx->bloom_settings = &bloom_settings; + if (!ctx->bloom_settings) { + bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", + bloom_settings.bits_per_entry); + bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES", + bloom_settings.num_hashes); + ctx->bloom_settings = &bloom_settings; + } if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -1974,9 +1999,23 @@ int write_commit_graph(struct object_directory *odb, ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0; ctx->split_opts = split_opts; - ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0; ctx->total_bloom_filter_data_size = 0; + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) + ctx->changed_paths = 1; + if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { + struct commit_graph *g; + prepare_commit_graph_one(ctx->r, ctx->odb); + + g = ctx->r->objects->commit_graph; + + /* We have changed-paths already. Keep them in the next graph */ + if (g && g->chunk_bloom_data) { + ctx->changed_paths = 1; + ctx->bloom_settings = g->bloom_filter_settings; + } + } + if (ctx->split) { struct commit_graph *g; prepare_commit_graph(ctx->r); diff --git a/commit-graph.h b/commit-graph.h index f0fb13e3f2..45b1e5bca3 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -96,6 +96,7 @@ enum commit_graph_write_flags { /* Make sure that each OID in the input is a valid commit OID. */ COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3), COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4), + COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5), }; struct split_commit_graph_opts { diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index 2761208e74..52ad998f9e 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -126,7 +126,7 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters test_commit c14 A/anotherFile2 && test_commit c15 A/B/anotherFile2 && test_commit c16 A/B/C/anotherFile2 && - GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split && + git commit-graph write --reachable --split --no-changed-paths && test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain ' @@ -152,6 +152,21 @@ test_expect_success 'Use Bloom filters if they exist in the latest but not all c test_bloom_filters_used_when_some_filters_are_missing "-- A/B" ' +test_expect_success 'persist filter settings' ' + test_when_finished rm -rf .git/objects/info/commit-graph* && + rm -rf .git/objects/info/commit-graph* && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + GIT_TRACE2_EVENT_NESTING=5 \ + GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \ + GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \ + git commit-graph write --reachable --changed-paths && + grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \ + GIT_TRACE2_EVENT_NESTING=5 \ + git commit-graph write --reachable --changed-paths && + grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt +' + test_expect_success 'correctly report changes over limit' ' git init 513changes && ( -- cgit v1.2.3 From 9bab081dfab8ebb4fcdf0d7e8a692626aeb29358 Mon Sep 17 00:00:00 2001 From: SZEDER Gábor Date: Wed, 1 Jul 2020 13:27:25 +0000 Subject: commit-graph: unify the signatures of all write_graph_chunk_*() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the write_graph_chunk_*() helper functions to have the same signature: - Return an int error code from all these functions. write_graph_chunk_base() already has an int error code, now the others will have one, too, but since they don't indicate any error, they will always return 0. - Drop the hash size parameter of write_graph_chunk_oids() and write_graph_chunk_data(); its value can be read directly from 'the_hash_algo' inside these functions as well. This opens up the possibility for further cleanups and foolproofing in the following two patches. Helped-by: René Scharfe Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 6762704324..1a6d26f864 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -891,8 +891,8 @@ struct write_commit_graph_context { const struct bloom_filter_settings *bloom_settings; }; -static void write_graph_chunk_fanout(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_fanout(struct hashfile *f, + struct write_commit_graph_context *ctx) { int i, count = 0; struct commit **list = ctx->commits.list; @@ -913,17 +913,21 @@ static void write_graph_chunk_fanout(struct hashfile *f, hashwrite_be32(f, count); } + + return 0; } -static void write_graph_chunk_oids(struct hashfile *f, int hash_len, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_oids(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; int count; for (count = 0; count < ctx->commits.nr; count++, list++) { display_progress(ctx->progress, ++ctx->progress_cnt); - hashwrite(f, (*list)->object.oid.hash, (int)hash_len); + hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz); } + + return 0; } static const unsigned char *commit_to_sha1(size_t index, void *table) @@ -932,8 +936,8 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) return commits[index]->object.oid.hash; } -static void write_graph_chunk_data(struct hashfile *f, int hash_len, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -950,7 +954,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); tree = get_commit_tree_oid(*list); - hashwrite(f, tree->hash, hash_len); + hashwrite(f, tree->hash, the_hash_algo->rawsz); parent = (*list)->parents; @@ -1030,10 +1034,12 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, list++; } + + return 0; } -static void write_graph_chunk_extra_edges(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_extra_edges(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1082,10 +1088,12 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, list++; } + + return 0; } -static void write_graph_chunk_bloom_indexes(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_bloom_indexes(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1108,6 +1116,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f, } stop_progress(&progress); + return 0; } static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx) @@ -1125,8 +1134,8 @@ static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx) jw_release(&jw); } -static void write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx) +static int write_graph_chunk_bloom_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; @@ -1155,6 +1164,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f, } stop_progress(&progress); + return 0; } static int oid_compare(const void *_a, const void *_b) @@ -1671,8 +1681,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) num_chunks * ctx->commits.nr); } write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, hashsz, ctx); - write_graph_chunk_data(f, hashsz, ctx); + write_graph_chunk_oids(f, ctx); + write_graph_chunk_data(f, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); if (ctx->changed_paths) { -- cgit v1.2.3 From 17e6275fc9f5763fb89a1219aceaf14adf340a2b Mon Sep 17 00:00:00 2001 From: SZEDER Gábor Date: Wed, 1 Jul 2020 13:27:26 +0000 Subject: commit-graph: simplify chunk writes into loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In write_commit_graph_file() we now have one block of code filling the array of 'struct chunk_info' with the IDs and sizes of chunks to be written, and an other block of code calling the functions responsible for writing individual chunks. In case of optional chunks like Extra Edge List an Base Graphs List there is also a condition checking whether that chunk is necessary/desired, and that same condition is repeated in both blocks of code. Other, newer chunks have similar optional conditions. Eliminate these repeated conditions by storing the function pointers responsible for writing individual chunks in the 'struct chunk_info' array as well, and calling them in a loop to write the commit-graph file. This will open up the possibility for a bit of foolproofing in the following patch. Helped-by: René Scharfe Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 1a6d26f864..2b26a9dad3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1559,9 +1559,13 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } +typedef int (*chunk_write_fn)(struct hashfile *f, + struct write_commit_graph_context *ctx); + struct chunk_info { uint32_t id; uint64_t size; + chunk_write_fn write_fn; }; static int write_commit_graph_file(struct write_commit_graph_context *ctx) @@ -1624,27 +1628,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunks[0].id = GRAPH_CHUNKID_OIDFANOUT; chunks[0].size = GRAPH_FANOUT_SIZE; + chunks[0].write_fn = write_graph_chunk_fanout; chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP; chunks[1].size = hashsz * ctx->commits.nr; + chunks[1].write_fn = write_graph_chunk_oids; chunks[2].id = GRAPH_CHUNKID_DATA; chunks[2].size = (hashsz + 16) * ctx->commits.nr; + chunks[2].write_fn = write_graph_chunk_data; if (ctx->num_extra_edges) { chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES; chunks[num_chunks].size = 4 * ctx->num_extra_edges; + chunks[num_chunks].write_fn = write_graph_chunk_extra_edges; num_chunks++; } if (ctx->changed_paths) { chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES; chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes; num_chunks++; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA; chunks[num_chunks].size = sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size; + chunks[num_chunks].write_fn = write_graph_chunk_bloom_data; num_chunks++; } if (ctx->num_commit_graphs_after > 1) { chunks[num_chunks].id = GRAPH_CHUNKID_BASE; chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1); + chunks[num_chunks].write_fn = write_graph_chunk_base; num_chunks++; } @@ -1680,19 +1691,12 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) progress_title.buf, num_chunks * ctx->commits.nr); } - write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, ctx); - write_graph_chunk_data(f, ctx); - if (ctx->num_extra_edges) - write_graph_chunk_extra_edges(f, ctx); - if (ctx->changed_paths) { - write_graph_chunk_bloom_indexes(f, ctx); - write_graph_chunk_bloom_data(f, ctx); - } - if (ctx->num_commit_graphs_after > 1 && - write_graph_chunk_base(f, ctx)) { - return -1; + + for (i = 0; i < num_chunks; i++) { + if (chunks[i].write_fn(f, ctx)) + return -1; } + stop_progress(&ctx->progress); strbuf_release(&progress_title); -- cgit v1.2.3 From 2dd4fed92715fddcefc0223b79e51af2798e921e Mon Sep 17 00:00:00 2001 From: SZEDER Gábor Date: Wed, 1 Jul 2020 13:27:27 +0000 Subject: commit-graph: check chunk sizes after writing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my experience while experimenting with new commit-graph chunks, early versions of the corresponding new write_commit_graph_my_chunk() functions are, sadly but not surprisingly, often buggy, and write more or less data than they are supposed to, especially if the chunk size is not directly proportional to the number of commits. This then causes all kinds of issues when reading such a bogus commit-graph file, raising the question of whether the writing or the reading part happens to be buggy this time. Let's catch such issues early, already when writing the commit-graph file, and check that each write_graph_chunk_*() function wrote the amount of data that it was expected to, and what has been encoded in the Chunk Lookup table. Now that all commit-graph chunks are written in a loop we can do this check in a single place for all chunks, and any chunks added in the future will get checked as well. Helped-by: René Scharfe Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 2b26a9dad3..6752916c1a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1693,8 +1693,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } for (i = 0; i < num_chunks; i++) { + uint64_t start_offset = f->total + f->offset; + if (chunks[i].write_fn(f, ctx)) return -1; + + if (f->total + f->offset != start_offset + chunks[i].size) + BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead", + chunks[i].size, chunks[i].id, + f->total + f->offset - start_offset); } stop_progress(&ctx->progress); -- cgit v1.2.3