diff options
author | Junio C Hamano <gitster@pobox.com> | 2025-07-28 12:02:34 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2025-07-28 12:02:34 -0700 |
commit | 0f6e5037d40db4768e8b61aea22c68c9711ce544 (patch) | |
tree | cf43012483aec4b0ac9d57080dd8c055b6cbf626 | |
parent | e4ef0485fd78fcb05866ea78df35796b904e4a8e (diff) | |
parent | a79e3519d66671a408041dac8c56d99078de41f2 (diff) |
Merge branch 'rs/pop-recent-commit-with-prio-queue'
The pop_most_recent_commit() function can have quite expensive
worst case performance characteristics, which has been optimized by
using prio-queue data structure.
* rs/pop-recent-commit-with-prio-queue:
commit: use prio_queue_replace() in pop_most_recent_commit()
prio-queue: add prio_queue_replace()
commit: convert pop_most_recent_commit() to prio_queue
-rw-r--r-- | commit.c | 14 | ||||
-rw-r--r-- | commit.h | 8 | ||||
-rw-r--r-- | fetch-pack.c | 13 | ||||
-rw-r--r-- | object-name.c | 10 | ||||
-rw-r--r-- | prio-queue.c | 45 | ||||
-rw-r--r-- | prio-queue.h | 8 | ||||
-rw-r--r-- | t/meson.build | 1 | ||||
-rwxr-xr-x | t/perf/p1501-rev-parse-oneline.sh | 71 | ||||
-rw-r--r-- | t/unit-tests/u-prio-queue.c | 23 | ||||
-rw-r--r-- | walker.c | 11 |
10 files changed, 170 insertions, 34 deletions
@@ -31,6 +31,7 @@ #include "parse.h" #include "object-file.h" #include "object-file-convert.h" +#include "prio-queue.h" static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); @@ -739,20 +740,27 @@ void commit_list_sort_by_date(struct commit_list **list) commit_list_sort(list, commit_list_compare_by_date); } -struct commit *pop_most_recent_commit(struct commit_list **list, +struct commit *pop_most_recent_commit(struct prio_queue *queue, unsigned int mark) { - struct commit *ret = pop_commit(list); + struct commit *ret = prio_queue_peek(queue); + int get_pending = 1; struct commit_list *parents = ret->parents; while (parents) { struct commit *commit = parents->item; if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) { commit->object.flags |= mark; - commit_list_insert_by_date(commit, list); + if (get_pending) + prio_queue_replace(queue, commit); + else + prio_queue_put(queue, commit); + get_pending = 0; } parents = parents->next; } + if (get_pending) + prio_queue_get(queue); return ret; } @@ -201,10 +201,10 @@ const char *repo_logmsg_reencode(struct repository *r, const char *skip_blank_lines(const char *msg); -/** Removes the first commit from a list sorted by date, and adds all - * of its parents. - **/ -struct commit *pop_most_recent_commit(struct commit_list **list, +struct prio_queue; + +/* Removes the first commit from a prio_queue and adds its parents. */ +struct commit *pop_most_recent_commit(struct prio_queue *queue, unsigned int mark); struct commit *pop_commit(struct commit_list **stack); diff --git a/fetch-pack.c b/fetch-pack.c index 01d3699c4b..c1be9b76eb 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -34,6 +34,7 @@ #include "commit-graph.h" #include "sigchain.h" #include "mergesort.h" +#include "prio-queue.h" static int transfer_unpack_limit = -1; static int fetch_unpack_limit = -1; @@ -601,7 +602,7 @@ done: return count ? retval : 0; } -static struct commit_list *complete; +static struct prio_queue complete = { compare_commits_by_commit_date }; static int mark_complete(const struct object_id *oid) { @@ -609,7 +610,7 @@ static int mark_complete(const struct object_id *oid) if (commit && !(commit->object.flags & COMPLETE)) { commit->object.flags |= COMPLETE; - commit_list_insert(commit, &complete); + prio_queue_put(&complete, commit); } return 0; } @@ -626,9 +627,12 @@ static int mark_complete_oid(const char *refname UNUSED, static void mark_recent_complete_commits(struct fetch_pack_args *args, timestamp_t cutoff) { - while (complete && cutoff <= complete->item->date) { + while (complete.nr) { + struct commit *item = prio_queue_peek(&complete); + if (item->date < cutoff) + break; print_verbose(args, _("Marking %s as complete"), - oid_to_hex(&complete->item->object.oid)); + oid_to_hex(&item->object.oid)); pop_most_recent_commit(&complete, COMPLETE); } } @@ -798,7 +802,6 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, refs_for_each_rawref(get_main_ref_store(the_repository), mark_complete_oid, NULL); for_each_cached_alternate(NULL, mark_alternate_complete); - commit_list_sort_by_date(&complete); if (cutoff) mark_recent_complete_commits(args, cutoff); } diff --git a/object-name.c b/object-name.c index ddafe7f9b1..41930609e3 100644 --- a/object-name.c +++ b/object-name.c @@ -28,6 +28,7 @@ #include "commit-reach.h" #include "date.h" #include "object-file-convert.h" +#include "prio-queue.h" static int get_oid_oneline(struct repository *r, const char *, struct object_id *, const struct commit_list *); @@ -1461,7 +1462,7 @@ static int get_oid_oneline(struct repository *r, const char *prefix, struct object_id *oid, const struct commit_list *list) { - struct commit_list *copy = NULL, **copy_tail = © + struct prio_queue copy = { compare_commits_by_commit_date }; const struct commit_list *l; int found = 0; int negative = 0; @@ -1483,9 +1484,9 @@ static int get_oid_oneline(struct repository *r, for (l = list; l; l = l->next) { l->item->object.flags |= ONELINE_SEEN; - copy_tail = &commit_list_insert(l->item, copy_tail)->next; + prio_queue_put(©, l->item); } - while (copy) { + while (copy.nr) { const char *p, *buf; struct commit *commit; int matches; @@ -1507,7 +1508,7 @@ static int get_oid_oneline(struct repository *r, regfree(®ex); for (l = list; l; l = l->next) clear_commit_marks(l->item, ONELINE_SEEN); - free_commit_list(copy); + clear_prio_queue(©); return found ? 0 : -1; } @@ -2061,7 +2062,6 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo, cb.list = &list; refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb); refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb); - commit_list_sort_by_date(&list); ret = get_oid_oneline(repo, name + 2, oid, list); free_commit_list(list); diff --git a/prio-queue.c b/prio-queue.c index ec33ac27db..9748528ce6 100644 --- a/prio-queue.c +++ b/prio-queue.c @@ -58,22 +58,10 @@ void prio_queue_put(struct prio_queue *queue, void *thing) } } -void *prio_queue_get(struct prio_queue *queue) +static void sift_down_root(struct prio_queue *queue) { - void *result; size_t ix, child; - if (!queue->nr) - return NULL; - if (!queue->compare) - return queue->array[--queue->nr].data; /* LIFO */ - - result = queue->array[0].data; - if (!--queue->nr) - return result; - - queue->array[0] = queue->array[queue->nr]; - /* Push down the one at the root */ for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) { child = ix * 2 + 1; /* left */ @@ -86,6 +74,23 @@ void *prio_queue_get(struct prio_queue *queue) swap(queue, child, ix); } +} + +void *prio_queue_get(struct prio_queue *queue) +{ + void *result; + + if (!queue->nr) + return NULL; + if (!queue->compare) + return queue->array[--queue->nr].data; /* LIFO */ + + result = queue->array[0].data; + if (!--queue->nr) + return result; + + queue->array[0] = queue->array[queue->nr]; + sift_down_root(queue); return result; } @@ -97,3 +102,17 @@ void *prio_queue_peek(struct prio_queue *queue) return queue->array[queue->nr - 1].data; return queue->array[0].data; } + +void prio_queue_replace(struct prio_queue *queue, void *thing) +{ + if (!queue->nr) { + prio_queue_put(queue, thing); + } else if (!queue->compare) { + queue->array[queue->nr - 1].ctr = queue->insertion_ctr++; + queue->array[queue->nr - 1].data = thing; + } else { + queue->array[0].ctr = queue->insertion_ctr++; + queue->array[0].data = thing; + sift_down_root(queue); + } +} diff --git a/prio-queue.h b/prio-queue.h index 38d032636d..da7fad2f1f 100644 --- a/prio-queue.h +++ b/prio-queue.h @@ -52,6 +52,14 @@ void *prio_queue_get(struct prio_queue *); */ void *prio_queue_peek(struct prio_queue *); +/* + * Replace the "thing" that compares the smallest with a new "thing", + * like prio_queue_get()+prio_queue_put() would do, but in a more + * efficient way. Does the same as prio_queue_put() if the queue is + * empty. + */ +void prio_queue_replace(struct prio_queue *queue, void *thing); + void clear_prio_queue(struct prio_queue *); /* Reverse the LIFO elements */ diff --git a/t/meson.build b/t/meson.build index 1af289425d..660d780dcc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1116,6 +1116,7 @@ benchmarks = [ 'perf/p1450-fsck.sh', 'perf/p1451-fsck-skip-list.sh', 'perf/p1500-graph-walks.sh', + 'perf/p1501-rev-parse-oneline.sh', 'perf/p2000-sparse-operations.sh', 'perf/p3400-rebase.sh', 'perf/p3404-rebase-interactive.sh', diff --git a/t/perf/p1501-rev-parse-oneline.sh b/t/perf/p1501-rev-parse-oneline.sh new file mode 100755 index 0000000000..538fa9c404 --- /dev/null +++ b/t/perf/p1501-rev-parse-oneline.sh @@ -0,0 +1,71 @@ +#!/bin/sh + +test_description='Test :/ object name notation' + +. ./perf-lib.sh + +test_perf_fresh_repo + +# +# Creates lots of merges to make history traversal costly. In +# particular it creates 2^($max_level-1)-1 2-way merges on top of +# 2^($max_level-1) root commits. E.g., the commit history looks like +# this for a $max_level of 3: +# +# _1_ +# / \ +# 2 3 +# / \ / \ +# 4 5 6 7 +# +# The numbers are the fast-import marks, which also are the commit +# messages. 1 is the HEAD commit and a merge, 2 and 3 are also merges, +# 4-7 are the root commits. +# +build_history () { + local max_level="$1" && + local level="${2:-1}" && + local mark="${3:-1}" && + if test $level -eq $max_level + then + echo "reset refs/heads/master" && + echo "from $ZERO_OID" && + echo "commit refs/heads/master" && + echo "mark :$mark" && + echo "committer C <c@example.com> 1234567890 +0000" && + echo "data <<EOF" && + echo "$mark" && + echo "EOF" + else + local level1=$((level+1)) && + local mark1=$((2*mark)) && + local mark2=$((2*mark+1)) && + build_history $max_level $level1 $mark1 && + build_history $max_level $level1 $mark2 && + echo "commit refs/heads/master" && + echo "mark :$mark" && + echo "committer C <c@example.com> 1234567890 +0000" && + echo "data <<EOF" && + echo "$mark" && + echo "EOF" && + echo "from :$mark1" && + echo "merge :$mark2" + fi +} + +test_expect_success 'setup' ' + build_history 16 | git fast-import && + git log --format="%H %s" --reverse >commits && + sed -n -e "s/ .*$//p" -e "q" <commits >expect && + sed -n -e "s/^.* //p" -e "q" <commits >needle +' + +test_perf "rev-parse :/$(cat needle)" ' + git rev-parse :/$(cat needle) >actual +' + +test_expect_success 'verify result' ' + test_cmp expect actual +' + +test_done diff --git a/t/unit-tests/u-prio-queue.c b/t/unit-tests/u-prio-queue.c index 145e689c9c..63e58114ae 100644 --- a/t/unit-tests/u-prio-queue.c +++ b/t/unit-tests/u-prio-queue.c @@ -13,6 +13,7 @@ static int intcmp(const void *va, const void *vb, void *data UNUSED) #define STACK -3 #define GET -4 #define REVERSE -5 +#define REPLACE -6 static int show(int *v) { @@ -51,6 +52,15 @@ static void test_prio_queue(int *input, size_t input_size, case REVERSE: prio_queue_reverse(&pq); break; + case REPLACE: + peek = prio_queue_peek(&pq); + cl_assert(i + 1 < input_size); + cl_assert(input[i + 1] >= 0); + cl_assert(j < result_size); + cl_assert_equal_i(result[j], show(peek)); + j++; + prio_queue_replace(&pq, &input[++i]); + break; default: prio_queue_put(&pq, &input[i]); break; @@ -81,6 +91,13 @@ void test_prio_queue__empty(void) ((int []){ 1, 2, MISSING, 1, 2, MISSING })); } +void test_prio_queue__replace(void) +{ + TEST_INPUT(((int []){ REPLACE, 6, 2, 4, REPLACE, 5, 7, GET, + REPLACE, 1, DUMP }), + ((int []){ MISSING, 2, 4, 5, 1, 6, 7 })); +} + void test_prio_queue__stack(void) { TEST_INPUT(((int []){ STACK, 8, 1, 5, 4, 6, 2, 3, DUMP }), @@ -92,3 +109,9 @@ void test_prio_queue__reverse_stack(void) TEST_INPUT(((int []){ STACK, 1, 2, 3, 4, 5, 6, REVERSE, DUMP }), ((int []){ 1, 2, 3, 4, 5, 6 })); } + +void test_prio_queue__replace_stack(void) +{ + TEST_INPUT(((int []){ STACK, 8, 1, 5, REPLACE, 4, 6, 2, 3, DUMP }), + ((int []){ 5, 3, 2, 6, 4, 1, 8 })); +} @@ -14,6 +14,7 @@ #include "blob.h" #include "refs.h" #include "progress.h" +#include "prio-queue.h" static struct object_id current_commit_oid; @@ -78,7 +79,7 @@ static int process_tree(struct walker *walker, struct tree *tree) #define SEEN (1U << 1) #define TO_SCAN (1U << 2) -static struct commit_list *complete = NULL; +static struct prio_queue complete = { compare_commits_by_commit_date }; static int process_commit(struct walker *walker, struct commit *commit) { @@ -87,7 +88,10 @@ static int process_commit(struct walker *walker, struct commit *commit) if (repo_parse_commit(the_repository, commit)) return -1; - while (complete && complete->item->date >= commit->date) { + while (complete.nr) { + struct commit *item = prio_queue_peek(&complete); + if (item->date < commit->date) + break; pop_most_recent_commit(&complete, COMPLETE); } @@ -233,7 +237,7 @@ static int mark_complete(const char *path UNUSED, if (commit) { commit->object.flags |= COMPLETE; - commit_list_insert(commit, &complete); + prio_queue_put(&complete, commit); } return 0; } @@ -302,7 +306,6 @@ int walker_fetch(struct walker *walker, int targets, char **target, if (!walker->get_recover) { refs_for_each_ref(get_main_ref_store(the_repository), mark_complete, NULL); - commit_list_sort_by_date(&complete); } for (i = 0; i < targets; i++) { |