From 570b8b883617df2acfedab88b9f5af0b50c821cd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 16:58:23 -0400 Subject: chunk-format: note that pair_chunk() is unsafe The pair_chunk() function is provided as an easy helper for parsing chunks that just want a pointer to a set of bytes. But every caller has a hidden bug: because we return only the pointer without the matching chunk size, the callers have no clue how many bytes they are allowed to look at. And as a result, they may read off the end of the mmap'd data when the on-disk file does not match their expectations. Since chunk files are typically used for local-repository data like commit-graph files and midx's, the security implications here are pretty mild. The worst that can happen is that you hand somebody a corrupted repository tarball, and running Git on it does an out-of-bounds read and crashes. So it's worth being more defensive, but we don't need to drop everything and fix every caller immediately. I noticed the problem because the pair_chunk_fn() callback does not look at its chunk_size argument, and wanted to annotate it to silence -Wunused-parameter. We could do that now, but we'd lose the hint that this code should be audited and fixed. So instead, let's set ourselves up for going down that path: 1. Provide a pair_chunk() function that does return the size, which prepares us for fixing these cases. 2. Rename the existing function to pair_chunk_unsafe(). That gives us an easy way to grep for cases which still need to be fixed, and the name should cause anybody adding new calls to think twice before using it. There are no callers of the "safe" version yet, but we'll add some in subsequent patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 1a56efcf69..a689a55b79 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -394,17 +394,17 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, GRAPH_HEADER_SIZE, graph->num_chunks)) goto free_and_return; - pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, + pair_chunk_unsafe(cf, GRAPH_CHUNKID_OIDFANOUT, (const unsigned char **)&graph->chunk_oid_fanout); read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); - pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); - pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); - pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); + pair_chunk_unsafe(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); + pair_chunk_unsafe(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); + pair_chunk_unsafe(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); if (s->commit_graph_generation_version >= 2) { - pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, + pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA, &graph->chunk_generation_data); - pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, + pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, &graph->chunk_generation_data_overflow); if (graph->chunk_generation_data) @@ -412,7 +412,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, } if (s->commit_graph_read_changed_paths) { - pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, + pair_chunk_unsafe(cf, GRAPH_CHUNKID_BLOOMINDEXES, &graph->chunk_bloom_indexes); read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, graph_read_bloom_data, graph); -- cgit v1.2.3 From 52e2e8d43dbae8c05b68efd60cde2aacf3a23890 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 16:59:51 -0400 Subject: commit-graph: check size of oid fanout chunk We load the oid fanout chunk with pair_chunk(), which means we never see the size of the chunk. We just assume the on-disk file uses the appropriate size, and if it's too small we'll access random memory. It's easy to check this up-front; the fanout always consists of 256 uint32's, since it is a fanout of the first byte of the hash pointing into the oid index. These parameters can't be changed without introducing a new chunk type. This matches the similar check in the midx OIDF chunk (but note that rather than checking for the error immediately, the graph code just leaves parts of the struct NULL and checks for required fields later). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 13 +++++++++++-- t/t5318-commit-graph.sh | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index a689a55b79..9b3b01da61 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -305,6 +305,16 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 0; } +static int graph_read_oid_fanout(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != 256 * sizeof(uint32_t)) + return error("commit-graph oid fanout chunk is wrong size"); + g->chunk_oid_fanout = (const uint32_t *)chunk_start; + return 0; +} + static int graph_read_oid_lookup(const unsigned char *chunk_start, size_t chunk_size, void *data) { @@ -394,8 +404,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, GRAPH_HEADER_SIZE, graph->num_chunks)) goto free_and_return; - pair_chunk_unsafe(cf, GRAPH_CHUNKID_OIDFANOUT, - (const unsigned char **)&graph->chunk_oid_fanout); + read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph); read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); pair_chunk_unsafe(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); pair_chunk_unsafe(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index ba65f17dd9..d25bea3ec5 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -2,6 +2,7 @@ test_description='commit graph' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -821,4 +822,29 @@ test_expect_success 'overflow during generation version upgrade' ' ) ' +corrupt_chunk () { + graph=full/.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git -C full commit-graph write --reachable && + corrupt_chunk_file $graph "$@" +} + +check_corrupt_chunk () { + corrupt_chunk "$@" && + git -C full -c core.commitGraph=false log >expect.out && + git -C full -c core.commitGraph=true log >out 2>err && + test_cmp expect.out out +} + +test_expect_success 'reader notices too-small oid fanout chunk' ' + # make it big enough that the graph file is plausible, + # otherwise we hit an earlier check + check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) && + cat >expect.err <<-\EOF && + error: commit-graph oid fanout chunk is wrong size + error: commit-graph is missing the OID Fanout chunk + EOF + test_cmp expect.err err +' + test_done -- cgit v1.2.3 From 4169d8964523198ca89f507824c07b70ba833732 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:04:58 -0400 Subject: commit-graph: check consistency of fanout table We use bsearch_hash() to look up items in the oid index of a commit-graph. It also has a fanout table to reduce the initial range in which we'll search. But since the fanout comes from the on-disk file, a corrupted or malicious file can cause us to look outside of the allocated index memory. One solution here would be to pass the total table size to bsearch_hash(), which could then bounds check the values it reads from the fanout. But there's an inexpensive up-front check we can do, and it's the same one used by the midx and pack idx code (both of which likewise have fanout tables and use bsearch_hash(), but are not affected by this bug): 1. We can check the value of the final fanout entry against the size of the table we got from the index chunk. These must always match, since the fanout is just slicing up the index. As a side note, the midx and pack idx code compute it the other way around: they use the final fanout value as the object count, and check the index size against it. Either is valid; if they disagree we cannot know which is wrong (a corrupted fanout value, or a too-small table of oids). 2. We can quickly scan the fanout table to make sure it is monotonically increasing. If it is, then we know that every value is less than or equal to the final value, and therefore less than or equal to the table size. It would also be sufficient to just check that each fanout value is smaller than the final one, but the midx and pack idx code both do a full monotonicity check. It's the same cost, and it catches some other corruptions (though not all; the checks done by "commit-graph verify" are more complete but more expensive, and our goal here is to be fast and memory-safe). There are two new tests. One just checks the final fanout value (this is the mirror image of the "too small oid lookup" case added for the midx in the previous commit; it's flipped here because commit-graph considers the oid lookup chunk to be the source of truth). The other actually creates a fanout with many out-of-bounds entries, and prior to this patch, it does cause the segfault you'd expect. But note that the error is not "your fanout entry is out-of-bounds", but rather "fanout value out of order". That's because we leave the final fanout value in place (to get past the table size check), making the index non-monotonic (the second-to-last entry is big, but the last one must remain small to match the actual table). We need adjustments to a few existing tests, as well: - an earlier test in t5318 corrupts the fanout and runs "commit-graph verify". Its message is now changed, since we catch the problem earlier (during the load step, rather than the careful validation step). - in t5324, we test that "commit-graph verify --shallow" does not do expensive verification on the base file of the chain. But the corruption it uses (munging a byte at offset 1000) happens to be in the middle of the fanout table. And now we detect that problem in the cheaper checks that are performed for every part of the graph. We'll push this back to offset 1500, which is only caught by the more expensive checksum validation. Likewise, there's a later test in t5324 which munges an offset 100 bytes into a file (also in the fanout table) that is referenced by an alternates file. So we now find that corruption during the load step, rather than the verification step. At the very least we need to change the error message (like the case above in t5318). But it is probably good to make sure we handle all parts of the verification even for alternate graph files. So let's likewise corrupt byte 1500 and make sure we found the invalid checksum. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 16 ++++++++++++++++ t/t5318-commit-graph.sh | 25 ++++++++++++++++++++++++- t/t5324-split-commit-graph.sh | 6 +++--- 3 files changed, 43 insertions(+), 4 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 9b3b01da61..b217e19194 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -277,6 +277,8 @@ struct commit_graph *load_commit_graph_one_fd_st(struct repository *r, static int verify_commit_graph_lite(struct commit_graph *g) { + int i; + /* * Basic validation shared between parse_commit_graph() * which'll be called every time the graph is used, and the @@ -302,6 +304,20 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 1; } + for (i = 0; i < 255; i++) { + uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]); + uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]); + + if (oid_fanout1 > oid_fanout2) { + error("commit-graph fanout values out of order"); + return 1; + } + } + if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) { + error("commit-graph oid table and fanout disagree on size"); + return 1; + } + return 0; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index d25bea3ec5..d10658de9e 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -560,7 +560,7 @@ test_expect_success 'detect incorrect fanout' ' test_expect_success 'detect incorrect fanout final value' ' corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \ - "fanout value" + "oid table and fanout disagree on size" ' test_expect_success 'detect incorrect OID order' ' @@ -847,4 +847,27 @@ test_expect_success 'reader notices too-small oid fanout chunk' ' test_cmp expect.err err ' +test_expect_success 'reader notices fanout/lookup table mismatch' ' + check_corrupt_chunk OIDF 1020 "FFFFFFFF" && + cat >expect.err <<-\EOF && + error: commit-graph oid table and fanout disagree on size + EOF + test_cmp expect.err err +' + +test_expect_success 'reader notices out-of-bounds fanout' ' + # Rather than try to corrupt a specific hash, we will just + # wreck them all. But we cannot just set them all to 0xFFFFFFFF or + # similar, as they are used for hi/lo starts in a binary search (so if + # they are identical, that indicates that the search should abort + # immediately). Instead, we will give them high values that differ by + # 2^24, ensuring that any that are used would cause an out-of-bounds + # read. + check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) && + cat >expect.err <<-\EOF && + error: commit-graph fanout values out of order + EOF + test_cmp expect.err err +' + test_done diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 06bb897f02..55b5765e2d 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -317,7 +317,7 @@ test_expect_success 'verify --shallow does not check base contents' ' cd verify-shallow && git commit-graph verify && base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && - corrupt_file "$base_file" 1000 "\01" && + corrupt_file "$base_file" 1500 "\01" && git commit-graph verify --shallow && test_must_fail git commit-graph verify 2>test_err && grep -v "^+" test_err >err && @@ -391,10 +391,10 @@ test_expect_success 'verify across alternates' ' test_commit extra && git commit-graph write --reachable --split && tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && - corrupt_file "$tip_file" 100 "\01" && + corrupt_file "$tip_file" 1500 "\01" && test_must_fail git commit-graph verify --shallow 2>test_err && grep -v "^+" test_err >err && - test_i18ngrep "commit-graph has incorrect fanout value" err + test_i18ngrep "incorrect checksum" err ) ' -- cgit v1.2.3 From c9b9fefc13ccce7ed248488c982d1da38b0905c7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:23 -0400 Subject: midx: enforce chunk alignment on reading The midx reader assumes chunks are aligned to a 4-byte boundary: we treat the fanout chunk as an array of uint32_t, indexing it to feed the results to ntohl(). Without aligning the chunks, we may violate the CPU's alignment constraints. Though many platforms allow this, some do not. And certanily UBSan will complain, since it is undefined behavior. Even though most chunks are naturally 4-byte-aligned (because they are storing uint32_t or larger types), PNAM is not. It stores NUL-terminated pack names, so you can have a valid chunk with any length. The writing side handles this by 4-byte-aligning the chunk, introducing a few extra NULs as necessary. But since we don't check this on the reading side, we may end up with a misaligned fanout and trigger the undefined behavior. We have two options here: 1. Swap out ntohl(fanout[i]) for get_be32(fanout+i) everywhere. The latter handles alignment itself. It's possible that it's slightly slower (though in practice I'm not sure how true that is, especially for these code paths which then go on to do a binary search). 2. Enforce the alignment when reading the chunks. This is easy to do, since the table-of-contents reader can check it in one spot. I went with the second option here, just because it places less burden on maintenance going forward (it is OK to continue using ntohl), and we know it can't have any performance impact on the actual reads. The commit-graph code uses the same chunk API. It's usually also 4-byte aligned, but some chunks are not (like Bloom filter BDAT chunks). So we'll pass "1" here to allow any alignment. It doesn't suffer from the same problem as midx with its fanout because the fanout chunk is always the first (and the rest of the format dictates that the first chunk will start aligned). The new test shows the effect on a midx with a misaligned PNAM chunk. Note that the midx-reading code treats chunk-toc errors as soft, falling back to the non-midx path rather than calling die(), as we do for other parsing errors. Arguably we should make all of these behave the same, but that's out of scope for this patch. For now the test just expects the fallback behavior. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- chunk-format.c | 8 +++++++- chunk-format.h | 3 ++- commit-graph.c | 2 +- midx.c | 3 ++- t/t5319-multi-pack-index.sh | 14 ++++++++++++++ 5 files changed, 26 insertions(+), 4 deletions(-) (limited to 'commit-graph.c') diff --git a/chunk-format.c b/chunk-format.c index 8d910e23f6..09ef86afa6 100644 --- a/chunk-format.c +++ b/chunk-format.c @@ -102,7 +102,8 @@ int read_table_of_contents(struct chunkfile *cf, const unsigned char *mfile, size_t mfile_size, uint64_t toc_offset, - int toc_length) + int toc_length, + unsigned expected_alignment) { int i; uint32_t chunk_id; @@ -120,6 +121,11 @@ int read_table_of_contents(struct chunkfile *cf, error(_("terminating chunk id appears earlier than expected")); return 1; } + if (chunk_offset % expected_alignment != 0) { + error(_("chunk id %"PRIx32" not %d-byte aligned"), + chunk_id, expected_alignment); + return 1; + } table_of_contents += CHUNK_TOC_ENTRY_SIZE; next_chunk_offset = get_be64(table_of_contents + 4); diff --git a/chunk-format.h b/chunk-format.h index 8dce7967f4..d608b8135c 100644 --- a/chunk-format.h +++ b/chunk-format.h @@ -36,7 +36,8 @@ int read_table_of_contents(struct chunkfile *cf, const unsigned char *mfile, size_t mfile_size, uint64_t toc_offset, - int toc_length); + int toc_length, + unsigned expected_alignment); #define CHUNK_NOT_FOUND (-2) diff --git a/commit-graph.c b/commit-graph.c index b217e19194..472332f603 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -417,7 +417,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, cf = init_chunkfile(NULL); if (read_table_of_contents(cf, graph->data, graph_size, - GRAPH_HEADER_SIZE, graph->num_chunks)) + GRAPH_HEADER_SIZE, graph->num_chunks, 1)) goto free_and_return; read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph); diff --git a/midx.c b/midx.c index ec585cae1b..98f84fbe61 100644 --- a/midx.c +++ b/midx.c @@ -154,7 +154,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local cf = init_chunkfile(NULL); if (read_table_of_contents(cf, m->data, midx_size, - MIDX_HEADER_SIZE, m->num_chunks)) + MIDX_HEADER_SIZE, m->num_chunks, + MIDX_CHUNK_ALIGNMENT)) goto cleanup_fail; if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 0a0ccec8a4..34f5944142 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1094,4 +1094,18 @@ test_expect_success 'reader notices too-small pack names chunk' ' test_cmp expect err ' +test_expect_success 'reader handles unaligned chunks' ' + # A 9-byte PNAM means all of the subsequent chunks + # will no longer be 4-byte aligned, but it is still + # a valid one-pack chunk on its own (it is "foo.pack\0"). + corrupt_chunk PNAM clear 666f6f2e7061636b00 && + git -c core.multipackindex=false log >expect.out && + git -c core.multipackindex=true log >out 2>err && + test_cmp expect.out out && + cat >expect.err <<-\EOF && + error: chunk id 4f494446 not 4-byte aligned + EOF + test_cmp expect.err err +' + test_done -- cgit v1.2.3 From b72df612afc12b46ea003732d739d7d746871773 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:36 -0400 Subject: commit-graph: check size of commit data chunk We expect a commit-graph file to have a fixed-size data record for each commit in the file (and we know the number of commits to expct from the size of the lookup table). If we encounter a file where this is too small, we'll look past the end of the chunk (and possibly even off the mapped memory). We can fix this by checking the size up front when we record the pointer. The included test doesn't segfault, since it ends up reading bytes from another chunk. But it produces nonsense results, since the values it reads are garbage. Our test notices this by comparing the output to a non-corrupted run of the same command (and of course we also check that the expected error is printed to stderr). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 12 +++++++++++- t/t5318-commit-graph.sh | 9 +++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 472332f603..9b80bbd75b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -340,6 +340,16 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start, return 0; } +static int graph_read_commit_data(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != g->num_commits * GRAPH_DATA_WIDTH) + return error("commit-graph commit data chunk is wrong size"); + g->chunk_commit_data = chunk_start; + return 0; +} + static int graph_read_bloom_data(const unsigned char *chunk_start, size_t chunk_size, void *data) { @@ -422,7 +432,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph); read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); - pair_chunk_unsafe(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data); + read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph); pair_chunk_unsafe(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); pair_chunk_unsafe(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index d10658de9e..492460157d 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -870,4 +870,13 @@ test_expect_success 'reader notices out-of-bounds fanout' ' test_cmp expect.err err ' +test_expect_success 'reader notices too-small commit data chunk' ' + check_corrupt_chunk CDAT clear 00000000 && + cat >expect.err <<-\EOF && + error: commit-graph commit data chunk is wrong size + error: commit-graph is missing the Commit Data chunk + EOF + test_cmp expect.err err +' + test_done -- cgit v1.2.3 From 9622610e55c7d4f81a924387947884b2ac268934 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:38 -0400 Subject: commit-graph: detect out-of-bounds extra-edges pointers If an entry in a commit-graph file has more than 2 parents, the fixed-size parent fields instead point to an offset within an "extra edges" chunk. We blindly follow these, assuming that the chunk is present and sufficiently large; this can lead to an out-of-bounds read for a corrupt or malicious file. We can fix this by recording the size of the chunk and adding a bounds-check in fill_commit_in_graph(). There are a few tricky bits: 1. We'll switch from working with a pointer to an offset. This makes some corner cases just fall out naturally: a. If we did not find an EDGE chunk at all, our size will correctly be zero (so everything is "out of bounds"). b. Comparing "size / 4" lets us make sure we have at least 4 bytes to read, and we never compute a pointer more than one element past the end of the array (computing a larger pointer is probably OK in practice, but is technically undefined behavior). c. The current code casts to "uint32_t *". Replacing it with an offset avoids any comparison between different types of pointer (since the chunk is stored as "unsigned char *"). 2. This is the first case in which fill_commit_in_graph() may return anything but success. We need to make sure to roll back the "parsed" flag (and any parents we might have added before running out of buffer) so that the caller can cleanly fall back to loading the commit object itself. It's a little non-trivial to do this, and we might benefit from factoring it out. But we can wait on that until we actually see a second case where we return an error. As a bonus, this lets us drop the st_mult() call. Since we've already done a bounds check, we know there won't be any integer overflow (it would imply our buffer is larger than a size_t can hold). The included test does not actually segfault before this patch (though you could construct a case where it does). Instead, it reads garbage from the next chunk which results in it complaining about a bogus parent id. This is sufficient for our needs, though (we care that the fallback succeeds, and that stderr mentions the out-of-bounds read). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 20 ++++++++++++++------ commit-graph.h | 1 + t/t5318-commit-graph.sh | 8 ++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 9b80bbd75b..e4860841fc 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -433,7 +433,8 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph); read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph); read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph); - pair_chunk_unsafe(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges); + pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges, + &graph->chunk_extra_edges_size); pair_chunk_unsafe(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); if (s->commit_graph_generation_version >= 2) { @@ -899,7 +900,7 @@ static int fill_commit_in_graph(struct repository *r, struct commit_graph *g, uint32_t pos) { uint32_t edge_value; - uint32_t *parent_data_ptr; + uint32_t parent_data_pos; struct commit_list **pptr; const unsigned char *commit_data; uint32_t lex_index; @@ -931,14 +932,21 @@ static int fill_commit_in_graph(struct repository *r, return 1; } - parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + - st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK)); + parent_data_pos = edge_value & GRAPH_EDGE_LAST_MASK; do { - edge_value = get_be32(parent_data_ptr); + if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) { + error("commit-graph extra-edges pointer out of bounds"); + free_commit_list(item->parents); + item->parents = NULL; + item->object.parsed = 0; + return 0; + } + edge_value = get_be32(g->chunk_extra_edges + + sizeof(uint32_t) * parent_data_pos); pptr = insert_parent_or_die(r, g, edge_value & GRAPH_EDGE_LAST_MASK, pptr); - parent_data_ptr++; + parent_data_pos++; } while (!(edge_value & GRAPH_LAST_EDGE)); return 1; diff --git a/commit-graph.h b/commit-graph.h index 20ada7e891..1f8a9de4fb 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -95,6 +95,7 @@ struct commit_graph { const unsigned char *chunk_generation_data; const unsigned char *chunk_generation_data_overflow; const unsigned char *chunk_extra_edges; + size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 492460157d..05bafcfe5f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -879,4 +879,12 @@ test_expect_success 'reader notices too-small commit data chunk' ' test_cmp expect.err err ' +test_expect_success 'reader notices out-of-bounds extra edge' ' + check_corrupt_chunk EDGE clear && + cat >expect.err <<-\EOF && + error: commit-graph extra-edges pointer out of bounds + EOF + test_cmp expect.err err +' + test_done -- cgit v1.2.3 From 6cf61d0db55291c3b8406a6ba8f20fdfb9a4a344 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:41 -0400 Subject: commit-graph: bounds-check base graphs chunk When we are loading a commit-graph chain, we check that each slice of the chain points to the appropriate set of base graphs via its BASE chunk. But since we don't record the size of the chunk, we may access out-of-bounds memory if the file is corrupted. Since we know the number of entries we expect to find (based on the position within the commit-graph-chain file), we can just check the size up front. In theory this would also let us drop the st_mult() call a few lines later when we actually access the memory, since we know that the computed offset will fit in a size_t. But because the operands "g->hash_len" and "n" have types "unsigned char" and "int", we'd have to cast to size_t first. Leaving the st_mult() does that cast, and makes it more obvious that we don't have an overflow problem. Note that the test does not actually segfault before this patch, since it just reads garbage from the chunk after BASE (and indeed, it even rejects the file because that garbage does not have the expected hash value). You could construct a file with BASE at the end that did segfault, but corrupting the existing one is easy, and we can check stderr for the expected message. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 8 +++++++- commit-graph.h | 1 + t/t5324-split-commit-graph.sh | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index e4860841fc..4377b547c8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -435,7 +435,8 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph); pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges, &graph->chunk_extra_edges_size); - pair_chunk_unsafe(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); + pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs, + &graph->chunk_base_graphs_size); if (s->commit_graph_generation_version >= 2) { pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA, @@ -546,6 +547,11 @@ static int add_graph_to_chain(struct commit_graph *g, return 0; } + if (g->chunk_base_graphs_size / g->hash_len < n) { + warning(_("commit-graph base graphs chunk is too small")); + return 0; + } + while (n) { n--; diff --git a/commit-graph.h b/commit-graph.h index 1f8a9de4fb..e4248ea05d 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -97,6 +97,7 @@ struct commit_graph { const unsigned char *chunk_extra_edges; size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; + size_t chunk_base_graphs_size; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 55b5765e2d..3c8482d073 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -2,6 +2,7 @@ test_description='split commit graph' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -398,6 +399,19 @@ test_expect_success 'verify across alternates' ' ) ' +test_expect_success 'reader bounds-checks base-graph chunk' ' + git clone --no-hardlinks . corrupt-base-chunk && + ( + cd corrupt-base-chunk && + tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_chunk_file "$tip_file" BASE clear 01020304 && + git -c core.commitGraph=false log >expect.out && + git -c core.commitGraph=true log >out 2>err && + test_cmp expect.out out && + grep "commit-graph base graphs chunk is too small" err + ) +' + test_expect_success 'add octopus merge' ' git reset --hard commits/10 && git merge commits/3 commits/4 && -- cgit v1.2.3 From 4a3c34662bc56a0e2369635536ac2ee1e79d8f56 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:44 -0400 Subject: commit-graph: check size of generations chunk We neither check nor record the size of the generations chunk we parse from a commit-graph file. This should have one uint32_t for each commit in the file; if it is smaller (due to corruption, etc), we may read outside the mapped memory. The included test segfaults without this patch, as it shrinks the size considerably (and the chunk is near the end of the file, so we read off the end of the array rather than accidentally reading another chunk). We can fix this by checking the size up front (like we do for other fixed-size chunks, like CDAT). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 14 ++++++++++++-- t/t5318-commit-graph.sh | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 4377b547c8..ca26870d1b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -350,6 +350,16 @@ static int graph_read_commit_data(const unsigned char *chunk_start, return 0; } +static int graph_read_generation_data(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != g->num_commits * sizeof(uint32_t)) + return error("commit-graph generations chunk is wrong size"); + g->chunk_generation_data = chunk_start; + return 0; +} + static int graph_read_bloom_data(const unsigned char *chunk_start, size_t chunk_size, void *data) { @@ -439,8 +449,8 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, &graph->chunk_base_graphs_size); if (s->commit_graph_generation_version >= 2) { - pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA, - &graph->chunk_generation_data); + read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, + graph_read_generation_data, graph); pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, &graph->chunk_generation_data_overflow); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 05bafcfe5f..6505ff595a 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -887,4 +887,12 @@ test_expect_success 'reader notices out-of-bounds extra edge' ' test_cmp expect.err err ' +test_expect_success 'reader notices too-small generations chunk' ' + check_corrupt_chunk GDA2 clear 00000000 && + cat >expect.err <<-\EOF && + error: commit-graph generations chunk is wrong size + EOF + test_cmp expect.err err +' + test_done -- cgit v1.2.3 From ee6a7924124b2a9030da55c94a27829e410b3b64 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:47 -0400 Subject: commit-graph: bounds-check generation overflow chunk If the generation entry in a commit-graph doesn't fit, we instead insert an offset into a generation overflow chunk. But since we don't record the size of the chunk, we may read outside the chunk if the offset we find on disk is malicious or corrupted. We can't check the size of the chunk up-front; it will vary based on how many entries need overflow. So instead, we'll do a bounds-check before accessing the chunk memory. Unfortunately there is no error-return from this function, so we'll just have to die(), which is what it does for other forms of corruption. As with other cases, we can drop the st_mult() call, since we know our bounds-checked value will fit within a size_t. Before this patch, the test here actually "works" because we read garbage data from the next chunk. And since that garbage data happens not to provide a generation number which changes the output, it appears to work. We could construct a case that actually segfaults or produces wrong output, but it would be a bit tricky. For our purposes its sufficient to check that we've detected the bounds error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 10 +++++++--- commit-graph.h | 1 + t/t5328-commit-graph-64bit-time.sh | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index ca26870d1b..f446e76c28 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -451,8 +451,9 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, if (s->commit_graph_generation_version >= 2) { read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, graph_read_generation_data, graph); - pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, - &graph->chunk_generation_data_overflow); + pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, + &graph->chunk_generation_data_overflow, + &graph->chunk_generation_data_overflow_size); if (graph->chunk_generation_data) graph->read_generation_data = 1; @@ -896,7 +897,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, die(_("commit-graph requires overflow generation data but has none")); offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; - graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos)); + if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos) + die(_("commit-graph overflow generation data is too small")); + graph_data->generation = item->date + + get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos); } else graph_data->generation = item->date + offset; } else diff --git a/commit-graph.h b/commit-graph.h index e4248ea05d..b373f15802 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -94,6 +94,7 @@ struct commit_graph { const unsigned char *chunk_commit_data; const unsigned char *chunk_generation_data; const unsigned char *chunk_generation_data_overflow; + size_t chunk_generation_data_overflow_size; const unsigned char *chunk_extra_edges; size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; diff --git a/t/t5328-commit-graph-64bit-time.sh b/t/t5328-commit-graph-64bit-time.sh index e9c521c061..e5ff3e07ad 100755 --- a/t/t5328-commit-graph-64bit-time.sh +++ b/t/t5328-commit-graph-64bit-time.sh @@ -10,6 +10,7 @@ then fi . "$TEST_DIRECTORY"/lib-commit-graph.sh +. "$TEST_DIRECTORY/lib-chunk.sh" UNIX_EPOCH_ZERO="@0 +0000" FUTURE_DATE="@4147483646 +0000" @@ -72,4 +73,13 @@ test_expect_success 'single commit with generation data exceeding UINT32_MAX' ' git -C repo-uint32-max commit-graph verify ' +test_expect_success 'reader notices out-of-bounds generation overflow' ' + graph=.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git commit-graph write --reachable && + corrupt_chunk_file $graph GDO2 clear && + test_must_fail git log 2>err && + grep "commit-graph overflow generation data is too small" err +' + test_done -- cgit v1.2.3 From 920f400e919c7c51f81adc6989cdd52630220783 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:50 -0400 Subject: commit-graph: check bounds when accessing BDAT chunk When loading Bloom filters from a commit-graph file, we use the offset values in the BIDX chunk to index into the memory mapped for the BDAT chunk. But since we don't record how big the BDAT chunk is, we just trust that the BIDX offsets won't cause us to read outside of the chunk memory. A corrupted or malicious commit-graph file will cause us to segfault (in practice this isn't a very interesting attack, since commit-graph files are local-only, and the worst case is an out-of-bounds read). We can't fix this by checking the chunk size during parsing, since the data in the BDAT chunk doesn't have a fixed size (that's why we need the BIDX in the first place). So we'll fix it in two parts: 1. Record the BDAT chunk size during parsing, and then later check that the BIDX offsets we look up are within bounds. 2. Because the offsets are relative to the end of the BDAT header, we must also make sure that the BDAT chunk is at least as large as the expected header size. Otherwise, we overflow when trying to move past the header, even for an offset of "0". We can check this early, during the parsing stage. The error messages are rather verbose, but since this is not something you'd expect to see outside of severe bugs or corruption, it makes sense to err on the side of too many details. Sadly we can't mention the filename during the chunk-parsing stage, as we haven't set g->filename at this point, nor passed it down through the stack. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- bloom.c | 24 ++++++++++++++++++++++++ commit-graph.c | 10 ++++++++++ commit-graph.h | 1 + t/t4216-log-bloom.sh | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+) (limited to 'commit-graph.c') diff --git a/bloom.c b/bloom.c index aef6b5fea2..61abad7f8c 100644 --- a/bloom.c +++ b/bloom.c @@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos) return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); } +static int check_bloom_offset(struct commit_graph *g, uint32_t pos, + uint32_t offset) +{ + /* + * Note that we allow offsets equal to the data size, which would set + * our pointers at one past the end of the chunk memory. This is + * necessary because the on-disk index points to the end of the + * entries (so we can compute size by comparing adjacent ones). And + * naturally the final entry's end is one-past-the-end of the chunk. + */ + if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE) + return 0; + + warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path" + " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")", + (uintmax_t)offset, (uintmax_t)pos, + g->filename, (uintmax_t)g->chunk_bloom_data_size); + return -1; +} + static int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, uint32_t graph_pos) @@ -51,6 +71,10 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, else start_index = 0; + if (check_bloom_offset(g, lex_pos, end_index) < 0 || + check_bloom_offset(g, lex_pos - 1, start_index) < 0) + return 0; + filter->len = end_index - start_index; filter->data = (unsigned char *)(g->chunk_bloom_data + sizeof(unsigned char) * start_index + diff --git a/commit-graph.c b/commit-graph.c index f446e76c28..f7a42be6d0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -365,7 +365,17 @@ static int graph_read_bloom_data(const unsigned char *chunk_start, { struct commit_graph *g = data; uint32_t hash_version; + + if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) { + warning("ignoring too-small changed-path chunk" + " (%"PRIuMAX" < %"PRIuMAX") in commit-graph file", + (uintmax_t)chunk_size, + (uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE); + return -1; + } + g->chunk_bloom_data = chunk_start; + g->chunk_bloom_data_size = chunk_size; hash_version = get_be32(chunk_start); if (hash_version != 1) diff --git a/commit-graph.h b/commit-graph.h index b373f15802..c6870274c5 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -101,6 +101,7 @@ struct commit_graph { size_t chunk_base_graphs_size; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; + size_t chunk_bloom_data_size; struct topo_level_slab *topo_levels; struct bloom_filter_settings *bloom_filter_settings; diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index fa9d32facf..7a727bcddd 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -5,6 +5,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -404,4 +405,31 @@ test_expect_success 'Bloom generation backfills empty commits' ' ) ' +corrupt_graph () { + graph=.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git commit-graph write --reachable --changed-paths && + corrupt_chunk_file $graph "$@" +} + +check_corrupt_graph () { + corrupt_graph "$@" && + git -c core.commitGraph=false log -- A/B/file2 >expect.out && + git -c core.commitGraph=true log -- A/B/file2 >out 2>err && + test_cmp expect.out out +} + +test_expect_success 'Bloom reader notices too-small data chunk' ' + check_corrupt_graph BDAT clear 00000000 && + echo "warning: ignoring too-small changed-path chunk" \ + "(4 < 12) in commit-graph file" >expect.err && + test_cmp expect.err err +' + +test_expect_success 'Bloom reader notices out-of-bounds filter offsets' ' + check_corrupt_graph BIDX 12 FFFFFFFF && + # use grep to avoid depending on exact chunk size + grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err +' + test_done -- cgit v1.2.3 From 581e0f8b189b5b3d50aae16be09941ab6fccf335 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:53 -0400 Subject: commit-graph: check bounds when accessing BIDX chunk We load the bloom_filter_indexes chunk using pair_chunk(), so we have no idea how big it is. This can lead to out-of-bounds reads if it is smaller than expected, since we index it based on the number of commits found elsewhere in the graph file. We can check the chunk size up front, like we do for CDAT and other chunks with one fixed-size record per commit. The test case demonstrates the problem. It actually won't segfault, because we end up reading random data from the follow-on chunk (BDAT in this case), and the bounds checks added in the previous patch complain. But this is by no means assured, and you can craft a commit-graph file with BIDX at the end (or a smaller BDAT) that does segfault. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 16 ++++++++++++++-- t/t4216-log-bloom.sh | 9 +++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index f7a42be6d0..1f334987b5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -360,6 +360,18 @@ static int graph_read_generation_data(const unsigned char *chunk_start, return 0; } +static int graph_read_bloom_index(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct commit_graph *g = data; + if (chunk_size != g->num_commits * 4) { + warning("commit-graph changed-path index chunk is too small"); + return -1; + } + g->chunk_bloom_indexes = chunk_start; + return 0; +} + static int graph_read_bloom_data(const unsigned char *chunk_start, size_t chunk_size, void *data) { @@ -470,8 +482,8 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, } if (s->commit_graph_read_changed_paths) { - pair_chunk_unsafe(cf, GRAPH_CHUNKID_BLOOMINDEXES, - &graph->chunk_bloom_indexes); + read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, + graph_read_bloom_index, graph); read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, graph_read_bloom_data, graph); } diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index 7a727bcddd..f6054cbb27 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -432,4 +432,13 @@ test_expect_success 'Bloom reader notices out-of-bounds filter offsets' ' grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err ' +test_expect_success 'Bloom reader notices too-small index chunk' ' + # replace the index with a single entry, making most + # lookups out-of-bounds + check_corrupt_graph BIDX clear 00000000 && + echo "warning: commit-graph changed-path index chunk" \ + "is too small" >expect.err && + test_cmp expect.err err +' + test_done -- cgit v1.2.3