diff options
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r-- | pack-bitmap.c | 391 |
1 files changed, 338 insertions, 53 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c index 36134222d7..9a208abc1f 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1,5 +1,6 @@ #include "cache.h" #include "commit.h" +#include "strbuf.h" #include "tag.h" #include "diff.h" #include "revision.h" @@ -83,6 +84,12 @@ struct bitmap_index { const unsigned char *checksum; /* + * If not NULL, this point into the commit table extension + * (within the memory mapped region `map`). + */ + unsigned char *table_lookup; + + /* * Extended index. * * When trying to perform bitmap operations with objects that are not @@ -138,7 +145,7 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) index->map_size - index->map_pos); if (bitmap_size < 0) { - error("Failed to load bitmap index (corrupted?)"); + error(_("failed to load bitmap index (corrupted?)")); ewah_pool_free(b); return NULL; } @@ -160,14 +167,14 @@ static int load_bitmap_header(struct bitmap_index *index) size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz; if (index->map_size < header_size + the_hash_algo->rawsz) - return error("Corrupted bitmap index (too small)"); + return error(_("corrupted bitmap index (too small)")); if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0) - return error("Corrupted bitmap index file (wrong header)"); + return error(_("corrupted bitmap index file (wrong header)")); index->version = ntohs(header->version); if (index->version != 1) - return error("Unsupported version for bitmap index file (%d)", index->version); + return error(_("unsupported version '%d' for bitmap index file"), index->version); /* Parse known bitmap format options */ { @@ -176,15 +183,25 @@ static int load_bitmap_header(struct bitmap_index *index) unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz; if ((flags & BITMAP_OPT_FULL_DAG) == 0) - return error("Unsupported options for bitmap index file " + BUG("unsupported options for bitmap index file " "(Git requires BITMAP_OPT_FULL_DAG)"); if (flags & BITMAP_OPT_HASH_CACHE) { if (cache_size > index_end - index->map - header_size) - return error("corrupted bitmap index file (too short to fit hash cache)"); + return error(_("corrupted bitmap index file (too short to fit hash cache)")); index->hashes = (void *)(index_end - cache_size); index_end -= cache_size; } + + if (flags & BITMAP_OPT_LOOKUP_TABLE) { + size_t table_size = st_mult(ntohl(header->entry_count), + BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH); + if (table_size > index_end - index->map - header_size) + return error(_("corrupted bitmap index file (too short to fit lookup table)")); + if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1)) + index->table_lookup = (void *)(index_end - table_size); + index_end -= table_size; + } } index->entry_count = ntohl(header->entry_count); @@ -211,11 +228,13 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret); - /* a 0 return code means the insertion succeeded with no changes, - * because the SHA1 already existed on the map. this is bad, there - * shouldn't be duplicated commits in the index */ + /* + * A 0 return code means the insertion succeeded with no changes, + * because the SHA1 already existed on the map. This is bad, there + * shouldn't be duplicated commits in the index. + */ if (ret == 0) { - error("Duplicate entry in bitmap index: %s", oid_to_hex(oid)); + error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid)); return NULL; } @@ -259,14 +278,14 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) struct object_id oid; if (index->map_size - index->map_pos < 6) - return error("corrupt ewah bitmap: truncated header for entry %d", i); + return error(_("corrupt ewah bitmap: truncated header for entry %d"), i); commit_idx_pos = read_be32(index->map, &index->map_pos); xor_offset = read_u8(index->map, &index->map_pos); flags = read_u8(index->map, &index->map_pos); if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0) - return error("corrupt ewah bitmap: commit index %u out of range", + return error(_("corrupt ewah bitmap: commit index %u out of range"), (unsigned)commit_idx_pos); bitmap = read_bitmap_1(index); @@ -274,13 +293,13 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) return -1; if (xor_offset > MAX_XOR_OFFSET || xor_offset > i) - return error("Corrupted bitmap pack index"); + return error(_("corrupted bitmap pack index")); if (xor_offset > 0) { xor_bitmap = recent_bitmaps[(i - xor_offset) % MAX_XOR_OFFSET]; if (!xor_bitmap) - return error("Invalid XOR offset in bitmap pack index"); + return error(_("invalid XOR offset in bitmap pack index")); } recent_bitmaps[i % MAX_XOR_OFFSET] = store_bitmap( @@ -313,17 +332,21 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, struct multi_pack_index *midx) { struct stat st; - char *idx_name = midx_bitmap_filename(midx); - int fd = git_open(idx_name); + char *bitmap_name = midx_bitmap_filename(midx); + int fd = git_open(bitmap_name); uint32_t i; struct packed_git *preferred; - free(idx_name); - - if (fd < 0) + if (fd < 0) { + if (errno != ENOENT) + warning_errno("cannot open '%s'", bitmap_name); + free(bitmap_name); return -1; + } + free(bitmap_name); if (fstat(fd, &st)) { + error_errno(_("cannot fstat bitmap file")); close(fd); return -1; } @@ -332,7 +355,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, struct strbuf buf = STRBUF_INIT; get_midx_filename(&buf, midx->object_dir); /* ignore extra bitmap file; we can only handle one */ - warning("ignoring extra bitmap file: %s", buf.buf); + warning(_("ignoring extra bitmap file: '%s'"), buf.buf); close(fd); strbuf_release(&buf); return -1; @@ -348,8 +371,10 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) + if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) { + error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; + } if (load_midx_revindex(bitmap_git->midx) < 0) { warning(_("multi-pack bitmap is missing required reverse index")); @@ -384,26 +409,31 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git { int fd; struct stat st; - char *idx_name; + char *bitmap_name; if (open_pack_index(packfile)) return -1; - idx_name = pack_bitmap_filename(packfile); - fd = git_open(idx_name); - free(idx_name); + bitmap_name = pack_bitmap_filename(packfile); + fd = git_open(bitmap_name); - if (fd < 0) + if (fd < 0) { + if (errno != ENOENT) + warning_errno("cannot open '%s'", bitmap_name); + free(bitmap_name); return -1; + } + free(bitmap_name); if (fstat(fd, &st)) { + error_errno(_("cannot fstat bitmap file")); close(fd); return -1; } if (bitmap_git->pack || bitmap_git->midx) { /* ignore extra bitmap file; we can only handle one */ - warning("ignoring extra bitmap file: %s", packfile->pack_name); + warning(_("ignoring extra bitmap file: '%s'"), packfile->pack_name); close(fd); return -1; } @@ -470,7 +500,7 @@ static int load_bitmap(struct bitmap_index *bitmap_git) !(bitmap_git->tags = read_bitmap_1(bitmap_git))) goto failed; - if (load_bitmap_entries_v1(bitmap_git) < 0) + if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0) goto failed; return 0; @@ -508,15 +538,16 @@ static int open_pack_bitmap(struct repository *r, static int open_midx_bitmap(struct repository *r, struct bitmap_index *bitmap_git) { + int ret = -1; struct multi_pack_index *midx; assert(!bitmap_git->map); for (midx = get_multi_pack_index(r); midx; midx = midx->next) { if (!open_midx_bitmap_1(bitmap_git, midx)) - return 0; + ret = 0; } - return -1; + return ret; } static int open_bitmap(struct repository *r, @@ -557,13 +588,256 @@ struct include_data { struct bitmap *seen; }; +struct bitmap_lookup_table_triplet { + uint32_t commit_pos; + uint64_t offset; + uint32_t xor_row; +}; + +struct bitmap_lookup_table_xor_item { + struct object_id oid; + uint64_t offset; +}; + +/* + * Given a `triplet` struct pointer and pointer `p`, this + * function reads the triplet beginning at `p` into the struct. + * Note that this function assumes that there is enough memory + * left for filling the `triplet` struct from `p`. + */ +static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet, + const unsigned char *p) +{ + if (!triplet) + return -1; + + triplet->commit_pos = get_be32(p); + p += sizeof(uint32_t); + triplet->offset = get_be64(p); + p += sizeof(uint64_t); + triplet->xor_row = get_be32(p); + return 0; +} + +/* + * This function gets the raw triplet from `row`'th row in the + * lookup table and fills that data to the `triplet`. + */ +static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git, + uint32_t pos, + struct bitmap_lookup_table_triplet *triplet) +{ + unsigned char *p = NULL; + if (pos >= bitmap_git->entry_count) + return error(_("corrupt bitmap lookup table: triplet position out of index")); + + p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH); + + return bitmap_lookup_table_get_triplet_by_pointer(triplet, p); +} + +/* + * Searches for a matching triplet. `commit_pos` is a pointer + * to the wanted commit position value. `table_entry` points to + * a triplet in lookup table. The first 4 bytes of each + * triplet (pointed by `table_entry`) are compared with `*commit_pos`. + */ +static int triplet_cmp(const void *commit_pos, const void *table_entry) +{ + + uint32_t a = *(uint32_t *)commit_pos; + uint32_t b = get_be32(table_entry); + if (a > b) + return 1; + else if (a < b) + return -1; + + return 0; +} + +static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git, + struct object_id *oid, + uint32_t *result) +{ + int found; + + if (bitmap_is_midx(bitmap_git)) + found = bsearch_midx(oid, bitmap_git->midx, result); + else + found = bsearch_pack(oid, bitmap_git->pack, result); + + return found; +} + +/* + * `bsearch_triplet_by_pos` function searches for the raw triplet + * having commit position same as `commit_pos` and fills `triplet` + * object from the raw triplet. Returns 1 on success and 0 on + * failure. + */ +static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos, + struct bitmap_index *bitmap_git, + struct bitmap_lookup_table_triplet *triplet) +{ + unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count, + BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp); + + if (!p) + return -1; + + return bitmap_lookup_table_get_triplet_by_pointer(triplet, p); +} + +static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit) +{ + uint32_t commit_pos, xor_row; + uint64_t offset; + int flags; + struct bitmap_lookup_table_triplet triplet; + struct object_id *oid = &commit->object.oid; + struct ewah_bitmap *bitmap; + struct stored_bitmap *xor_bitmap = NULL; + const int bitmap_header_size = 6; + static struct bitmap_lookup_table_xor_item *xor_items = NULL; + static size_t xor_items_nr = 0, xor_items_alloc = 0; + static int is_corrupt = 0; + int xor_flags; + khiter_t hash_pos; + struct bitmap_lookup_table_xor_item *xor_item; + + if (is_corrupt) + return NULL; + + if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos)) + return NULL; + + if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0) + return NULL; + + xor_items_nr = 0; + offset = triplet.offset; + xor_row = triplet.xor_row; + + while (xor_row != 0xffffffff) { + ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc); + + if (xor_items_nr + 1 >= bitmap_git->entry_count) { + error(_("corrupt bitmap lookup table: xor chain exceed entry count")); + goto corrupt; + } + + if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0) + goto corrupt; + + xor_item = &xor_items[xor_items_nr]; + xor_item->offset = triplet.offset; + + if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) { + error(_("corrupt bitmap lookup table: commit index %u out of range"), + triplet.commit_pos); + goto corrupt; + } + + hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid); + + /* + * If desired bitmap is already stored, we don't need + * to iterate further. Because we know that bitmaps + * that are needed to be parsed to parse this bitmap + * has already been stored. So, assign this stored bitmap + * to the xor_bitmap. + */ + if (hash_pos < kh_end(bitmap_git->bitmaps) && + (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos))) + break; + xor_items_nr++; + xor_row = triplet.xor_row; + } + + while (xor_items_nr) { + xor_item = &xor_items[xor_items_nr - 1]; + bitmap_git->map_pos = xor_item->offset; + if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) { + error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""), + oid_to_hex(&xor_item->oid)); + goto corrupt; + } + + bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t); + xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos); + bitmap = read_bitmap_1(bitmap_git); + + if (!bitmap) + goto corrupt; + + xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid, xor_bitmap, xor_flags); + xor_items_nr--; + } + + bitmap_git->map_pos = offset; + if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) { + error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""), + oid_to_hex(oid)); + goto corrupt; + } + + /* + * Don't bother reading the commit's index position or its xor + * offset: + * + * - The commit's index position is irrelevant to us, since + * load_bitmap_entries_v1 only uses it to learn the object + * id which is used to compute the hashmap's key. We already + * have an object id, so no need to look it up again. + * + * - The xor_offset is unusable for us, since it specifies how + * many entries previous to ours we should look at. This + * makes sense when reading the bitmaps sequentially (as in + * load_bitmap_entries_v1()), since we can keep track of + * each bitmap as we read them. + * + * But it can't work for us, since the bitmap's don't have a + * fixed size. So we learn the position of the xor'd bitmap + * from the commit table (and resolve it to a bitmap in the + * above if-statement). + * + * Instead, we can skip ahead and immediately read the flags and + * ewah bitmap. + */ + bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t); + flags = read_u8(bitmap_git->map, &bitmap_git->map_pos); + bitmap = read_bitmap_1(bitmap_git); + + if (!bitmap) + goto corrupt; + + return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags); + +corrupt: + free(xor_items); + is_corrupt = 1; + return NULL; +} + struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit) { khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid); - if (hash_pos >= kh_end(bitmap_git->bitmaps)) - return NULL; + if (hash_pos >= kh_end(bitmap_git->bitmaps)) { + struct stored_bitmap *bitmap = NULL; + if (!bitmap_git->table_lookup) + return NULL; + + trace2_region_enter("pack-bitmap", "reading_lookup_table", the_repository); + /* NEEDSWORK: cache misses aren't recorded */ + bitmap = lazy_bitmap_for_commit(bitmap_git, commit); + trace2_region_leave("pack-bitmap", "reading_lookup_table", the_repository); + if (!bitmap) + return NULL; + return lookup_stored_bitmap(bitmap); + } return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); } @@ -831,7 +1105,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, revs->include_check_data = &incdata; if (prepare_revision_walk(revs)) - die("revision walk setup failed"); + die(_("revision walk setup failed")); show_data.bitmap_git = bitmap_git; show_data.base = base; @@ -1640,15 +1914,15 @@ static void test_bitmap_type(struct bitmap_test_data *tdata, } if (bitmap_type == OBJ_NONE) - die("object %s not found in type bitmaps", + die(_("object '%s' not found in type bitmaps"), oid_to_hex(&obj->oid)); if (bitmaps_nr > 1) - die("object %s does not have a unique type", + die(_("object '%s' does not have a unique type"), oid_to_hex(&obj->oid)); if (bitmap_type != obj->type) - die("object %s: real type %s, expected: %s", + die(_("object '%s': real type '%s', expected: '%s'"), oid_to_hex(&obj->oid), type_name(obj->type), type_name(bitmap_type)); @@ -1662,7 +1936,7 @@ static void test_show_object(struct object *object, const char *name, bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid); if (bitmap_pos < 0) - die("Object not in bitmap: %s\n", oid_to_hex(&object->oid)); + die(_("object not in bitmap: '%s'"), oid_to_hex(&object->oid)); test_bitmap_type(tdata, object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); @@ -1677,7 +1951,7 @@ static void test_show_commit(struct commit *commit, void *data) bitmap_pos = bitmap_position(tdata->bitmap_git, &commit->object.oid); if (bitmap_pos < 0) - die("Object not in bitmap: %s\n", oid_to_hex(&commit->object.oid)); + die(_("object not in bitmap: '%s'"), oid_to_hex(&commit->object.oid)); test_bitmap_type(tdata, &commit->object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); @@ -1694,26 +1968,28 @@ void test_bitmap_walk(struct rev_info *revs) struct ewah_bitmap *bm; if (!(bitmap_git = prepare_bitmap_git(revs->repo))) - die("failed to load bitmap indexes"); + die(_("failed to load bitmap indexes")); if (revs->pending.nr != 1) - die("you must specify exactly one commit to test"); + die(_("you must specify exactly one commit to test")); - fprintf(stderr, "Bitmap v%d test (%d entries loaded)\n", - bitmap_git->version, bitmap_git->entry_count); + fprintf_ln(stderr, "Bitmap v%d test (%d entries%s)", + bitmap_git->version, + bitmap_git->entry_count, + bitmap_git->table_lookup ? "" : " loaded"); root = revs->pending.objects[0].item; bm = bitmap_for_commit(bitmap_git, (struct commit *)root); if (bm) { - fprintf(stderr, "Found bitmap for %s. %d bits / %08x checksum\n", + fprintf_ln(stderr, "Found bitmap for '%s'. %d bits / %08x checksum", oid_to_hex(&root->oid), (int)bm->bit_size, ewah_checksum(bm)); result = ewah_to_bitmap(bm); } if (!result) - die("Commit %s doesn't have an indexed bitmap", oid_to_hex(&root->oid)); + die(_("commit '%s' doesn't have an indexed bitmap"), oid_to_hex(&root->oid)); revs->tag_objects = 1; revs->tree_objects = 1; @@ -1722,7 +1998,7 @@ void test_bitmap_walk(struct rev_info *revs) result_popcnt = bitmap_popcount(result); if (prepare_revision_walk(revs)) - die("revision walk setup failed"); + die(_("revision walk setup failed")); tdata.bitmap_git = bitmap_git; tdata.base = bitmap_new(); @@ -1738,9 +2014,9 @@ void test_bitmap_walk(struct rev_info *revs) stop_progress(&tdata.prg); if (bitmap_equals(result, tdata.base)) - fprintf(stderr, "OK!\n"); + fprintf_ln(stderr, "OK!"); else - die("mismatch in bitmap results"); + die(_("mismatch in bitmap results")); bitmap_free(result); bitmap_free(tdata.base); @@ -1753,15 +2029,24 @@ void test_bitmap_walk(struct rev_info *revs) int test_bitmap_commits(struct repository *r) { - struct bitmap_index *bitmap_git = prepare_bitmap_git(r); struct object_id oid; MAYBE_UNUSED void *value; + struct bitmap_index *bitmap_git = prepare_bitmap_git(r); if (!bitmap_git) - die("failed to load bitmap indexes"); + die(_("failed to load bitmap indexes")); + + /* + * As this function is only used to print bitmap selected + * commits, we don't have to read the commit table. + */ + if (bitmap_git->table_lookup) { + if (load_bitmap_entries_v1(bitmap_git) < 0) + die(_("failed to load bitmap indexes")); + } kh_foreach(bitmap_git->bitmaps, oid, value, { - printf("%s\n", oid_to_hex(&oid)); + printf_ln("%s", oid_to_hex(&oid)); }); free_bitmap_index(bitmap_git); @@ -1786,7 +2071,7 @@ int test_bitmap_hashes(struct repository *r) nth_bitmap_object_oid(bitmap_git, &oid, index_pos); - printf("%s %"PRIu32"\n", + printf_ln("%s %"PRIu32"", oid_to_hex(&oid), get_be32(bitmap_git->hashes + index_pos)); } @@ -1948,7 +2233,7 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, struct object_id oid; nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos); - die(_("could not find %s in pack %s at offset %"PRIuMAX), + die(_("could not find '%s' in pack '%s' at offset %"PRIuMAX), oid_to_hex(&oid), pack->pack_name, (uintmax_t)offset); @@ -1984,7 +2269,7 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) continue; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) - die(_("unable to get disk usage of %s"), + die(_("unable to get disk usage of '%s'"), oid_to_hex(&obj->oid)); total += object_size; |