summaryrefslogtreecommitdiff
path: root/range-diff.c
diff options
context:
space:
mode:
authorPaulo Casaretto <pcasaretto@gmail.com>2025-08-29 16:02:54 +0000
committerJunio C Hamano <gitster@pobox.com>2025-08-29 09:46:07 -0700
commit00727249ec8404c68391ec58e9c9f0d8a88d5ca0 (patch)
tree466a3bbb6ff3c13198bf303df28df227bc23e98e /range-diff.c
parentf814da676ae46aac5be0a98b99373a76dee6cedb (diff)
range-diff: add configurable memory limit for cost matrix
When comparing large commit ranges (e.g., 250,000+ commits), range-diff attempts to allocate an n×n cost matrix that can exhaust available memory. For example, with 256,784 commits (n = 513,568), the matrix would require approximately 256GB of memory (513,568² × 4 bytes), causing either immediate segmentation faults due to integer overflow or system hangs. Add a memory limit check in get_correspondences() before allocating the cost matrix. This check uses the total size in bytes (n² × sizeof(int)) and compares it against a configurable maximum, preventing both excessive memory usage and integer overflow issues. The limit is configurable via a new --max-memory option that accepts human-readable sizes (e.g., "1G", "500M"). The default is 4GB for 64 bit systems and 2GB for 32 bit systems. This allows comparing ranges of approximately 32,000 (16,000) commits - generous for real-world use cases while preventing impractical operations. When the limit is exceeded, range-diff now displays a clear error message showing both the requested memory size and the maximum allowed, formatted in human-readable units for better user experience. Example usage: git range-diff --max-memory=1G branch1...branch2 git range-diff --max-memory=500M base..topic1 base..topic2 This approach was chosen over alternatives: - Pre-counting commits: Would require spawning additional git processes and reading all commits twice - Limiting by commit count: Less precise than actual memory usage - Streaming approach: Would require significant refactoring of the current algorithm This issue was previously discussed in: https://lore.kernel.org/git/RFC-cover-v2-0.5-00000000000-20211210T122901Z-avarab@gmail.com/ Acked-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Paulo Casaretto <pcasaretto@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'range-diff.c')
-rw-r--r--range-diff.c20
1 files changed, 16 insertions, 4 deletions
diff --git a/range-diff.c b/range-diff.c
index 8a2dcbee32..ca449a0769 100644
--- a/range-diff.c
+++ b/range-diff.c
@@ -325,13 +325,24 @@ static int diffsize(const char *a, const char *b)
}
static void get_correspondences(struct string_list *a, struct string_list *b,
- int creation_factor)
+ int creation_factor, size_t max_memory)
{
int n = a->nr + b->nr;
int *cost, c, *a2b, *b2a;
int i, j;
-
- ALLOC_ARRAY(cost, st_mult(n, n));
+ size_t cost_size = st_mult(n, n);
+ size_t cost_bytes = st_mult(sizeof(int), cost_size);
+ if (cost_bytes >= max_memory) {
+ struct strbuf cost_str = STRBUF_INIT;
+ struct strbuf max_str = STRBUF_INIT;
+ strbuf_humanise_bytes(&cost_str, cost_bytes);
+ strbuf_humanise_bytes(&max_str, max_memory);
+ die(_("range-diff: unable to compute the range-diff, since it "
+ "exceeds the maximum memory for the cost matrix: %s "
+ "(%"PRIuMAX" bytes) needed, limited to %s (%"PRIuMAX" bytes)"),
+ cost_str.buf, (uintmax_t)cost_bytes, max_str.buf, (uintmax_t)max_memory);
+ }
+ ALLOC_ARRAY(cost, cost_size);
ALLOC_ARRAY(a2b, n);
ALLOC_ARRAY(b2a, n);
@@ -591,7 +602,8 @@ int show_range_diff(const char *range1, const char *range2,
if (!res) {
find_exact_matches(&branch1, &branch2);
get_correspondences(&branch1, &branch2,
- range_diff_opts->creation_factor);
+ range_diff_opts->creation_factor,
+ range_diff_opts->max_memory);
output(&branch1, &branch2, range_diff_opts);
}