From 7a7d992d0dbd78cf1fc477cf1e1caf61833b3e41 Mon Sep 17 00:00:00 2001 From: Martin Ågren Date: Thu, 31 Dec 2020 12:56:22 +0100 Subject: sha1-lookup: rename `sha1_pos()` as `hash_pos()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename this function to reflect that we're not just able to handle SHA-1 these days. There are a few instances of "sha1" left in sha1-lookup.[ch] after this, but those will be addressed in the next commit. Signed-off-by: Martin Ågren Reviewed-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index fe1fa3dc41..0b7bace022 100644 --- a/commit.c +++ b/commit.c @@ -113,7 +113,7 @@ static const unsigned char *commit_graft_sha1_access(size_t index, void *table) int commit_graft_pos(struct repository *r, const unsigned char *sha1) { - return sha1_pos(sha1, r->parsed_objects->grafts, + return hash_pos(sha1, r->parsed_objects->grafts, r->parsed_objects->grafts_nr, commit_graft_sha1_access); } -- cgit v1.2.3 From bc626927575cea80b8bc5fd0dbb6c6439e34e606 Mon Sep 17 00:00:00 2001 From: Martin Ågren Date: Thu, 31 Dec 2020 12:56:23 +0100 Subject: hash-lookup: rename from sha1-lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change all remnants of "sha1" in hash-lookup.c and .h and rename them to reflect that we're not just able to handle SHA-1 these days. Signed-off-by: Martin Ågren Reviewed-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Makefile | 2 +- bisect.c | 2 +- builtin/name-rev.c | 2 +- commit-graph.c | 2 +- commit.c | 2 +- hash-lookup.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++ hash-lookup.h | 32 +++++++++++++ midx.c | 2 +- object-file.c | 2 +- oid-array.c | 2 +- pack-bitmap-write.c | 2 +- packfile.c | 2 +- patch-ids.c | 2 +- rerere.c | 2 +- sha1-lookup.c | 129 ---------------------------------------------------- sha1-lookup.h | 32 ------------- 16 files changed, 173 insertions(+), 173 deletions(-) create mode 100644 hash-lookup.c create mode 100644 hash-lookup.h delete mode 100644 sha1-lookup.c delete mode 100644 sha1-lookup.h (limited to 'commit.c') diff --git a/Makefile b/Makefile index 224a1c6940..7a141facc7 100644 --- a/Makefile +++ b/Makefile @@ -901,6 +901,7 @@ LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o +LIB_OBJS += hash-lookup.o LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o @@ -995,7 +996,6 @@ LIB_OBJS += sequencer.o LIB_OBJS += serve.o LIB_OBJS += server-info.o LIB_OBJS += setup.o -LIB_OBJS += sha1-lookup.o LIB_OBJS += shallow.o LIB_OBJS += sideband.o LIB_OBJS += sigchain.o diff --git a/bisect.c b/bisect.c index d8c2c8f7a7..75ea0eb57f 100644 --- a/bisect.c +++ b/bisect.c @@ -6,7 +6,7 @@ #include "refs.h" #include "list-objects.h" #include "quote.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "run-command.h" #include "log-tree.h" #include "bisect.h" diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 4939ceb2e5..3fe71a8c01 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -7,7 +7,7 @@ #include "refs.h" #include "parse-options.h" #include "prio-queue.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "commit-slab.h" /* diff --git a/commit-graph.c b/commit-graph.c index c672feee91..e9124d4a41 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -7,7 +7,7 @@ #include "object.h" #include "refs.h" #include "revision.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "commit-graph.h" #include "object-store.h" #include "alloc.h" diff --git a/commit.c b/commit.c index 0b7bace022..cb119ebdf2 100644 --- a/commit.c +++ b/commit.c @@ -14,7 +14,7 @@ #include "mergesort.h" #include "commit-slab.h" #include "prio-queue.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "wt-status.h" #include "advice.h" #include "refs.h" diff --git a/hash-lookup.c b/hash-lookup.c new file mode 100644 index 0000000000..1191856a32 --- /dev/null +++ b/hash-lookup.c @@ -0,0 +1,129 @@ +#include "cache.h" +#include "hash-lookup.h" + +static uint32_t take2(const unsigned char *hash) +{ + return ((hash[0] << 8) | hash[1]); +} + +/* + * Conventional binary search loop looks like this: + * + * do { + * int mi = lo + (hi - lo) / 2; + * int cmp = "entry pointed at by mi" minus "target"; + * if (!cmp) + * return (mi is the wanted one) + * if (cmp > 0) + * hi = mi; "mi is larger than target" + * else + * lo = mi+1; "mi is smaller than target" + * } while (lo < hi); + * + * The invariants are: + * + * - When entering the loop, lo points at a slot that is never + * above the target (it could be at the target), hi points at a + * slot that is guaranteed to be above the target (it can never + * be at the target). + * + * - We find a point 'mi' between lo and hi (mi could be the same + * as lo, but never can be the same as hi), and check if it hits + * the target. There are three cases: + * + * - if it is a hit, we are happy. + * + * - if it is strictly higher than the target, we update hi with + * it. + * + * - if it is strictly lower than the target, we update lo to be + * one slot after it, because we allow lo to be at the target. + * + * When choosing 'mi', we do not have to take the "middle" but + * anywhere in between lo and hi, as long as lo <= mi < hi is + * satisfied. When we somehow know that the distance between the + * target and lo is much shorter than the target and hi, we could + * pick mi that is much closer to lo than the midway. + */ +/* + * The table should contain "nr" elements. + * The hash of element i (between 0 and nr - 1) should be returned + * by "fn(i, table)". + */ +int hash_pos(const unsigned char *hash, void *table, size_t nr, + hash_access_fn fn) +{ + size_t hi = nr; + size_t lo = 0; + size_t mi = 0; + + if (!nr) + return -1; + + if (nr != 1) { + size_t lov, hiv, miv, ofs; + + for (ofs = 0; ofs < the_hash_algo->rawsz - 2; ofs += 2) { + lov = take2(fn(0, table) + ofs); + hiv = take2(fn(nr - 1, table) + ofs); + miv = take2(hash + ofs); + if (miv < lov) + return -1; + if (hiv < miv) + return index_pos_to_insert_pos(nr); + if (lov != hiv) { + /* + * At this point miv could be equal + * to hiv (but hash could still be higher); + * the invariant of (mi < hi) should be + * kept. + */ + mi = (nr - 1) * (miv - lov) / (hiv - lov); + if (lo <= mi && mi < hi) + break; + BUG("assertion failed in binary search"); + } + } + } + + do { + int cmp; + cmp = hashcmp(fn(mi, table), hash); + if (!cmp) + return mi; + if (cmp > 0) + hi = mi; + else + lo = mi + 1; + mi = lo + (hi - lo) / 2; + } while (lo < hi); + return index_pos_to_insert_pos(lo); +} + +int bsearch_hash(const unsigned char *hash, const uint32_t *fanout_nbo, + const unsigned char *table, size_t stride, uint32_t *result) +{ + uint32_t hi, lo; + + hi = ntohl(fanout_nbo[*hash]); + lo = ((*hash == 0x0) ? 0 : ntohl(fanout_nbo[*hash - 1])); + + while (lo < hi) { + unsigned mi = lo + (hi - lo) / 2; + int cmp = hashcmp(table + mi * stride, hash); + + if (!cmp) { + if (result) + *result = mi; + return 1; + } + if (cmp > 0) + hi = mi; + else + lo = mi + 1; + } + + if (result) + *result = lo; + return 0; +} diff --git a/hash-lookup.h b/hash-lookup.h new file mode 100644 index 0000000000..5d476dec72 --- /dev/null +++ b/hash-lookup.h @@ -0,0 +1,32 @@ +#ifndef HASH_LOOKUP_H +#define HASH_LOOKUP_H + +typedef const unsigned char *hash_access_fn(size_t index, void *table); + +int hash_pos(const unsigned char *hash, + void *table, + size_t nr, + hash_access_fn fn); + +/* + * Searches for hash in table, using the given fanout table to determine the + * interval to search, then using binary search. Returns 1 if found, 0 if not. + * + * Takes the following parameters: + * + * - hash: the hash to search for + * - fanout_nbo: a 256-element array of NETWORK-order 32-bit integers; the + * integer at position i represents the number of elements in table whose + * first byte is less than or equal to i + * - table: a sorted list of hashes with optional extra information in between + * - stride: distance between two consecutive elements in table (should be + * GIT_MAX_RAWSZ or greater) + * - result: if not NULL, this function stores the element index of the + * position found (if the search is successful) or the index of the least + * element that is greater than hash (if the search is not successful) + * + * This function does not verify the validity of the fanout table. + */ +int bsearch_hash(const unsigned char *hash, const uint32_t *fanout_nbo, + const unsigned char *table, size_t stride, uint32_t *result); +#endif diff --git a/midx.c b/midx.c index 79c282b070..f9d9b832bb 100644 --- a/midx.c +++ b/midx.c @@ -5,7 +5,7 @@ #include "lockfile.h" #include "packfile.h" #include "object-store.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "midx.h" #include "progress.h" #include "trace2.h" diff --git a/object-file.c b/object-file.c index 3508598d97..5bcfde8471 100644 --- a/object-file.c +++ b/object-file.c @@ -20,7 +20,7 @@ #include "tree-walk.h" #include "refs.h" #include "pack-revindex.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "bulk-checkin.h" #include "repository.h" #include "replace-object.h" diff --git a/oid-array.c b/oid-array.c index fb4c3dd795..889b311f22 100644 --- a/oid-array.c +++ b/oid-array.c @@ -1,6 +1,6 @@ #include "cache.h" #include "oid-array.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" void oid_array_append(struct oid_array *array, const struct object_id *oid) { diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 27ece05ec7..ae6d1475f9 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -9,7 +9,7 @@ #include "pack-revindex.h" #include "pack.h" #include "pack-bitmap.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "pack-objects.h" #include "commit-reach.h" diff --git a/packfile.c b/packfile.c index 86f5c8dbf6..62d92e0c7c 100644 --- a/packfile.c +++ b/packfile.c @@ -7,7 +7,7 @@ #include "packfile.h" #include "delta.h" #include "streaming.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "commit.h" #include "object.h" #include "tag.h" diff --git a/patch-ids.c b/patch-ids.c index 21973e4933..cf5e8045b7 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -1,7 +1,7 @@ #include "cache.h" #include "diff.h" #include "commit.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "patch-ids.h" static int patch_id_defined(struct commit *commit) diff --git a/rerere.c b/rerere.c index 9fc76eb756..d6928c1b5c 100644 --- a/rerere.c +++ b/rerere.c @@ -10,7 +10,7 @@ #include "attr.h" #include "pathspec.h" #include "object-store.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #define RESOLVED 0 #define PUNTED 1 diff --git a/sha1-lookup.c b/sha1-lookup.c deleted file mode 100644 index 45489edfe8..0000000000 --- a/sha1-lookup.c +++ /dev/null @@ -1,129 +0,0 @@ -#include "cache.h" -#include "sha1-lookup.h" - -static uint32_t take2(const unsigned char *sha1) -{ - return ((sha1[0] << 8) | sha1[1]); -} - -/* - * Conventional binary search loop looks like this: - * - * do { - * int mi = lo + (hi - lo) / 2; - * int cmp = "entry pointed at by mi" minus "target"; - * if (!cmp) - * return (mi is the wanted one) - * if (cmp > 0) - * hi = mi; "mi is larger than target" - * else - * lo = mi+1; "mi is smaller than target" - * } while (lo < hi); - * - * The invariants are: - * - * - When entering the loop, lo points at a slot that is never - * above the target (it could be at the target), hi points at a - * slot that is guaranteed to be above the target (it can never - * be at the target). - * - * - We find a point 'mi' between lo and hi (mi could be the same - * as lo, but never can be the same as hi), and check if it hits - * the target. There are three cases: - * - * - if it is a hit, we are happy. - * - * - if it is strictly higher than the target, we update hi with - * it. - * - * - if it is strictly lower than the target, we update lo to be - * one slot after it, because we allow lo to be at the target. - * - * When choosing 'mi', we do not have to take the "middle" but - * anywhere in between lo and hi, as long as lo <= mi < hi is - * satisfied. When we somehow know that the distance between the - * target and lo is much shorter than the target and hi, we could - * pick mi that is much closer to lo than the midway. - */ -/* - * The table should contain "nr" elements. - * The hash of element i (between 0 and nr - 1) should be returned - * by "fn(i, table)". - */ -int hash_pos(const unsigned char *hash, void *table, size_t nr, - hash_access_fn fn) -{ - size_t hi = nr; - size_t lo = 0; - size_t mi = 0; - - if (!nr) - return -1; - - if (nr != 1) { - size_t lov, hiv, miv, ofs; - - for (ofs = 0; ofs < the_hash_algo->rawsz - 2; ofs += 2) { - lov = take2(fn(0, table) + ofs); - hiv = take2(fn(nr - 1, table) + ofs); - miv = take2(hash + ofs); - if (miv < lov) - return -1; - if (hiv < miv) - return index_pos_to_insert_pos(nr); - if (lov != hiv) { - /* - * At this point miv could be equal - * to hiv (but hash could still be higher); - * the invariant of (mi < hi) should be - * kept. - */ - mi = (nr - 1) * (miv - lov) / (hiv - lov); - if (lo <= mi && mi < hi) - break; - BUG("assertion failed in binary search"); - } - } - } - - do { - int cmp; - cmp = hashcmp(fn(mi, table), hash); - if (!cmp) - return mi; - if (cmp > 0) - hi = mi; - else - lo = mi + 1; - mi = lo + (hi - lo) / 2; - } while (lo < hi); - return index_pos_to_insert_pos(lo); -} - -int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo, - const unsigned char *table, size_t stride, uint32_t *result) -{ - uint32_t hi, lo; - - hi = ntohl(fanout_nbo[*sha1]); - lo = ((*sha1 == 0x0) ? 0 : ntohl(fanout_nbo[*sha1 - 1])); - - while (lo < hi) { - unsigned mi = lo + (hi - lo) / 2; - int cmp = hashcmp(table + mi * stride, sha1); - - if (!cmp) { - if (result) - *result = mi; - return 1; - } - if (cmp > 0) - hi = mi; - else - lo = mi + 1; - } - - if (result) - *result = lo; - return 0; -} diff --git a/sha1-lookup.h b/sha1-lookup.h deleted file mode 100644 index 79973d4785..0000000000 --- a/sha1-lookup.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef SHA1_LOOKUP_H -#define SHA1_LOOKUP_H - -typedef const unsigned char *hash_access_fn(size_t index, void *table); - -int hash_pos(const unsigned char *hash, - void *table, - size_t nr, - hash_access_fn fn); - -/* - * Searches for sha1 in table, using the given fanout table to determine the - * interval to search, then using binary search. Returns 1 if found, 0 if not. - * - * Takes the following parameters: - * - * - sha1: the hash to search for - * - fanout_nbo: a 256-element array of NETWORK-order 32-bit integers; the - * integer at position i represents the number of elements in table whose - * first byte is less than or equal to i - * - table: a sorted list of hashes with optional extra information in between - * - stride: distance between two consecutive elements in table (should be - * GIT_MAX_RAWSZ or greater) - * - result: if not NULL, this function stores the element index of the - * position found (if the search is successful) or the index of the least - * element that is greater than sha1 (if the search is not successful) - * - * This function does not verify the validity of the fanout table. - */ -int bsearch_hash(const unsigned char *sha1, const uint32_t *fanout_nbo, - const unsigned char *table, size_t stride, uint32_t *result); -#endif -- cgit v1.2.3