diff options
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r-- | pack-bitmap.c | 746 |
1 files changed, 619 insertions, 127 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c index 878f378016..683f467051 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "git-compat-util.h" #include "commit.h" #include "gettext.h" @@ -20,6 +22,7 @@ #include "list-objects-filter-options.h" #include "midx.h" #include "config.h" +#include "pseudo-merge.h" /* * An entry on the bitmap index, representing the bitmap for a given @@ -51,13 +54,6 @@ struct bitmap_index { struct packed_git *pack; struct multi_pack_index *midx; - /* - * Mark the first `reuse_objects` in the packfile as reused: - * they will be sent as-is without using them for repacking - * calculations - */ - uint32_t reuse_objects; - /* mmapped buffer of the whole bitmap index */ unsigned char *map; size_t map_size; /* size of the mmaped buffer */ @@ -93,6 +89,9 @@ struct bitmap_index { */ unsigned char *table_lookup; + /* This contains the pseudo-merge cache within 'map' (if found). */ + struct pseudo_merge_map pseudo_merges; + /* * Extended index. * @@ -117,6 +116,13 @@ struct bitmap_index { unsigned int version; }; +static int pseudo_merges_satisfied_nr; +static int pseudo_merges_cascades_nr; +static int existing_bitmaps_hits_nr; +static int existing_bitmaps_misses_nr; +static int roots_with_bitmaps_nr; +static int roots_without_bitmaps_nr; + static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st) { struct ewah_bitmap *parent; @@ -136,17 +142,13 @@ static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st) return composed; } -/* - * Read a bitmap from the current read position on the mmaped - * index, and increase the read position accordingly - */ -static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) +struct ewah_bitmap *read_bitmap(const unsigned char *map, + size_t map_size, size_t *map_pos) { struct ewah_bitmap *b = ewah_pool_new(); - ssize_t bitmap_size = ewah_read_mmap(b, - index->map + index->map_pos, - index->map_size - index->map_pos); + ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos, + map_size - *map_pos); if (bitmap_size < 0) { error(_("failed to load bitmap index (corrupted?)")); @@ -154,10 +156,20 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) return NULL; } - index->map_pos += bitmap_size; + *map_pos += bitmap_size; + return b; } +/* + * Read a bitmap from the current read position on the mmaped + * index, and increase the read position accordingly + */ +static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) +{ + return read_bitmap(index->map, index->map_size, &index->map_pos); +} + static uint32_t bitmap_num_objects(struct bitmap_index *index) { if (index->midx) @@ -206,6 +218,46 @@ static int load_bitmap_header(struct bitmap_index *index) index->table_lookup = (void *)(index_end - table_size); index_end -= table_size; } + + if (flags & BITMAP_OPT_PSEUDO_MERGES) { + unsigned char *pseudo_merge_ofs; + size_t table_size; + uint32_t i; + + if (sizeof(table_size) > index_end - index->map - header_size) + return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)")); + + table_size = get_be64(index_end - 8); + if (table_size > index_end - index->map - header_size) + return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)")); + + if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) { + const unsigned char *ext = (index_end - table_size); + + index->pseudo_merges.map = index->map; + index->pseudo_merges.map_size = index->map_size; + index->pseudo_merges.commits = ext + get_be64(index_end - 16); + index->pseudo_merges.commits_nr = get_be32(index_end - 20); + index->pseudo_merges.nr = get_be32(index_end - 24); + + if (st_add(st_mult(index->pseudo_merges.nr, + sizeof(uint64_t)), + 24) > table_size) + return error(_("corrupted bitmap index file, pseudo-merge table too short")); + + CALLOC_ARRAY(index->pseudo_merges.v, + index->pseudo_merges.nr); + + pseudo_merge_ofs = index_end - 24 - + (index->pseudo_merges.nr * sizeof(uint64_t)); + for (i = 0; i < index->pseudo_merges.nr; i++) { + index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs); + pseudo_merge_ofs += sizeof(uint64_t); + } + } + + index_end -= table_size; + } } index->entry_count = ntohl(header->entry_count); @@ -316,9 +368,8 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) char *midx_bitmap_filename(struct multi_pack_index *midx) { struct strbuf buf = STRBUF_INIT; - - get_midx_filename(&buf, midx->object_dir); - strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx))); + get_midx_filename_ext(&buf, midx->object_dir, get_midx_checksum(midx), + MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); } @@ -338,7 +389,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, struct stat st; char *bitmap_name = midx_bitmap_filename(midx); int fd = git_open(bitmap_name); - uint32_t i; + uint32_t i, preferred_pack; struct packed_git *preferred; if (fd < 0) { @@ -375,7 +426,8 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) { + if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, + the_repository->hash_algo)) { error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; } @@ -393,7 +445,12 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, } } - preferred = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; + if (midx_preferred_pack(bitmap_git->midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); + goto cleanup; + } + + preferred = bitmap_git->midx->packs[preferred_pack]; if (!is_pack_valid(preferred)) { warning(_("preferred pack (%s) is invalid"), preferred->pack_name); @@ -878,7 +935,7 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, const struct object_id *oid) { uint32_t pos; - off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack); + off_t offset = find_pack_entry_one(oid, bitmap_git->pack); if (!offset) return -1; @@ -963,6 +1020,22 @@ static void show_commit(struct commit *commit UNUSED, { } +static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git, + struct bitmap *result, + struct commit *commit, + uint32_t commit_pos) +{ + int ret; + + ret = apply_pseudo_merges_for_commit(&bitmap_git->pseudo_merges, + result, commit, commit_pos); + + if (ret) + pseudo_merges_satisfied_nr += ret; + + return ret; +} + static int add_to_include_set(struct bitmap_index *bitmap_git, struct include_data *data, struct commit *commit, @@ -978,11 +1051,19 @@ static int add_to_include_set(struct bitmap_index *bitmap_git, partial = bitmap_for_commit(bitmap_git, commit); if (partial) { + existing_bitmaps_hits_nr++; + bitmap_or_ewah(data->base, partial); return 0; } + existing_bitmaps_misses_nr++; + bitmap_set(data->base, bitmap_pos); + if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit, + bitmap_pos)) + return 0; + return 1; } @@ -1033,8 +1114,12 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, { struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit); - if (!or_with) + if (!or_with) { + existing_bitmaps_misses_nr++; return 0; + } + + existing_bitmaps_hits_nr++; if (!*base) *base = ewah_to_bitmap(or_with); @@ -1101,12 +1186,27 @@ static void show_boundary_commit(struct commit *commit, void *_data) } } -static void show_boundary_object(struct object *object, - const char *name, void *data) +static void show_boundary_object(struct object *object UNUSED, + const char *name UNUSED, + void *data UNUSED) { BUG("should not be called"); } +static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git, + struct bitmap *result, + struct bitmap *roots) +{ + int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges, + result, roots); + if (ret) { + pseudo_merges_cascades_nr++; + pseudo_merges_satisfied_nr += ret; + } + + return ret; +} + static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots) @@ -1116,6 +1216,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, unsigned int i; unsigned int tmp_blobs, tmp_trees, tmp_tags; int any_missing = 0; + int existing_bitmaps = 0; cb.bitmap_git = bitmap_git; cb.base = bitmap_new(); @@ -1123,6 +1224,25 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, revs->ignore_missing_links = 1; + if (bitmap_git->pseudo_merges.nr) { + struct bitmap *roots_bitmap = bitmap_new(); + struct object_list *objects = NULL; + + for (objects = roots; objects; objects = objects->next) { + struct object *object = objects->item; + int pos; + + pos = bitmap_position(bitmap_git, &object->oid); + if (pos < 0) + continue; + + bitmap_set(roots_bitmap, pos); + } + + if (!cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap)) + bitmap_free(roots_bitmap); + } + /* * OR in any existing reachability bitmaps among `roots` into * `cb.base`. @@ -1134,8 +1254,10 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, continue; if (add_commit_to_bitmap(bitmap_git, &cb.base, - (struct commit *)object)) + (struct commit *)object)) { + existing_bitmaps = 1; continue; + } any_missing = 1; } @@ -1143,6 +1265,9 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, if (!any_missing) goto cleanup; + if (existing_bitmaps) + cascade_pseudo_merges_1(bitmap_git, cb.base, NULL); + tmp_blobs = revs->blob_objects; tmp_trees = revs->tree_objects; tmp_tags = revs->tag_objects; @@ -1198,6 +1323,44 @@ cleanup: return cb.base; } +struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit) +{ + struct commit_list *p; + struct bitmap *parents; + struct pseudo_merge *match = NULL; + + if (!bitmap_git->pseudo_merges.nr) + return NULL; + + parents = bitmap_new(); + + for (p = commit->parents; p; p = p->next) { + int pos = bitmap_position(bitmap_git, &p->item->object.oid); + if (pos < 0 || pos >= bitmap_num_objects(bitmap_git)) + goto done; + + bitmap_set(parents, pos); + } + + match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, + parents); + +done: + bitmap_free(parents); + if (match) + return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match); + + return NULL; +} + +static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git) +{ + uint32_t i; + for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) + bitmap_git->pseudo_merges.v[i].satisfied = 0; +} + static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, @@ -1205,9 +1368,32 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, { struct bitmap *base = NULL; int needs_walk = 0; + unsigned existing_bitmaps = 0; struct object_list *not_mapped = NULL; + unsatisfy_all_pseudo_merges(bitmap_git); + + if (bitmap_git->pseudo_merges.nr) { + struct bitmap *roots_bitmap = bitmap_new(); + struct object_list *objects = NULL; + + for (objects = roots; objects; objects = objects->next) { + struct object *object = objects->item; + int pos; + + pos = bitmap_position(bitmap_git, &object->oid); + if (pos < 0) + continue; + + bitmap_set(roots_bitmap, pos); + } + + base = bitmap_new(); + cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap); + bitmap_free(roots_bitmap); + } + /* * Go through all the roots for the walk. The ones that have bitmaps * on the bitmap index will be `or`ed together to form an initial @@ -1218,11 +1404,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, */ while (roots) { struct object *object = roots->item; + roots = roots->next; + if (base) { + int pos = bitmap_position(bitmap_git, &object->oid); + if (pos > 0 && bitmap_get(base, pos)) { + object->flags |= SEEN; + continue; + } + } + if (object->type == OBJ_COMMIT && add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) { object->flags |= SEEN; + existing_bitmaps = 1; continue; } @@ -1238,6 +1434,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, roots = not_mapped; + if (existing_bitmaps) + cascade_pseudo_merges_1(bitmap_git, base, NULL); + /* * Let's iterate through all the roots that don't have bitmaps to * check if we can determine them to be reachable from the existing @@ -1258,8 +1457,12 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, object->flags &= ~UNINTERESTING; add_pending_object(revs, object, ""); needs_walk = 1; + + roots_without_bitmaps_nr++; } else { object->flags |= SEEN; + + roots_with_bitmaps_nr++; } } @@ -1279,6 +1482,8 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, base = fill_in_bitmap(bitmap_git, revs, base, seen); } + object_list_free(¬_mapped); + return base; } @@ -1404,7 +1609,7 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, if (bsearch_midx(&object->oid, bitmap_git->midx, NULL)) return 1; } else { - if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0) + if (find_pack_entry_one(&object->oid, bitmap_git->pack) > 0) return 1; } } @@ -1665,6 +1870,30 @@ static int can_filter_bitmap(struct list_objects_filter_options *filter) return !filter_bitmap(NULL, NULL, NULL, filter); } + +static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmap *result) +{ + struct eindex *eindex = &bitmap_git->ext_index; + uint32_t objects_nr; + size_t i, pos; + + objects_nr = bitmap_num_objects(bitmap_git); + pos = objects_nr / BITS_IN_EWORD; + + if (pos > result->word_alloc) + pos = result->word_alloc; + + memset(result->words, 0x00, sizeof(eword_t) * pos); + for (i = pos * BITS_IN_EWORD; i < objects_nr; i++) + bitmap_unset(result, i); + + for (i = 0; i < eindex->count; ++i) { + if (has_object_pack(&eindex->objects[i]->oid)) + bitmap_unset(result, objects_nr + i); + } +} + struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects) { @@ -1787,12 +2016,28 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, wants_bitmap, &revs->filter); + if (revs->unpacked) + filter_packed_objects_from_bitmap(bitmap_git, wants_bitmap); + bitmap_git->result = wants_bitmap; bitmap_git->haves = haves_bitmap; object_list_free(&wants); object_list_free(&haves); + trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied", + pseudo_merges_satisfied_nr); + trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades", + pseudo_merges_cascades_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/hits", + existing_bitmaps_hits_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/misses", + existing_bitmaps_misses_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap", + roots_with_bitmaps_nr); + trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap", + roots_without_bitmaps_nr); + return bitmap_git; cleanup: @@ -1806,49 +2051,30 @@ cleanup: * -1 means "stop trying further objects"; 0 means we may or may not have * reused, but you can keep feeding bits. */ -static int try_partial_reuse(struct packed_git *pack, - size_t pos, +static int try_partial_reuse(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + size_t bitmap_pos, + uint32_t pack_pos, + off_t offset, struct bitmap *reuse, struct pack_window **w_curs) { - off_t offset, delta_obj_offset; + off_t delta_obj_offset; enum object_type type; unsigned long size; - /* - * try_partial_reuse() is called either on (a) objects in the - * bitmapped pack (in the case of a single-pack bitmap) or (b) - * objects in the preferred pack of a multi-pack bitmap. - * Importantly, the latter can pretend as if only a single pack - * exists because: - * - * - The first pack->num_objects bits of a MIDX bitmap are - * reserved for the preferred pack, and - * - * - Ties due to duplicate objects are always resolved in - * favor of the preferred pack. - * - * Therefore we do not need to ever ask the MIDX for its copy of - * an object by OID, since it will always select it from the - * preferred pack. Likewise, the selected copy of the base - * object for any deltas will reside in the same pack. - * - * This means that we can reuse pos when looking up the bit in - * the reuse bitmap, too, since bits corresponding to the - * preferred pack precede all bits from other packs. - */ - - if (pos >= pack->num_objects) - return -1; /* not actually in the pack or MIDX preferred pack */ + if (pack_pos >= pack->p->num_objects) + return -1; /* not actually in the pack */ - offset = delta_obj_offset = pack_pos_to_offset(pack, pos); - type = unpack_object_header(pack, w_curs, &offset, &size); + delta_obj_offset = offset; + type = unpack_object_header(pack->p, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; uint32_t base_pos; + uint32_t base_bitmap_pos; /* * Find the position of the base object so we can look it up @@ -1858,24 +2084,48 @@ static int try_partial_reuse(struct packed_git *pack, * and the normal slow path will complain about it in * more detail. */ - base_offset = get_delta_base(pack, w_curs, &offset, type, + base_offset = get_delta_base(pack->p, w_curs, &offset, type, delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0) - return 0; - /* - * We assume delta dependencies always point backwards. This - * lets us do a single pass, and is basically always true - * due to the way OFS_DELTAs work. You would not typically - * find REF_DELTA in a bitmapped pack, since we only bitmap - * packs we write fresh, and OFS_DELTA is the default). But - * let's double check to make sure the pack wasn't written with - * odd parameters. - */ - if (base_pos >= pos) - return 0; + offset_to_pack_pos(pack->p, base_offset, &base_pos); + + if (bitmap_is_midx(bitmap_git)) { + /* + * Cross-pack deltas are rejected for now, but could + * theoretically be supported in the future. + * + * We would need to ensure that we're sending both + * halves of the delta/base pair, regardless of whether + * or not the two cross a pack boundary. If they do, + * then we must convert the delta to an REF_DELTA to + * refer back to the base in the other pack. + * */ + if (midx_pair_to_pack_pos(bitmap_git->midx, + pack->pack_int_id, + base_offset, + &base_bitmap_pos) < 0) { + return 0; + } + } else { + if (offset_to_pack_pos(pack->p, base_offset, + &base_pos) < 0) + return 0; + /* + * We assume delta dependencies always point backwards. + * This lets us do a single pass, and is basically + * always true due to the way OFS_DELTAs work. You would + * not typically find REF_DELTA in a bitmapped pack, + * since we only bitmap packs we write fresh, and + * OFS_DELTA is the default). But let's double check to + * make sure the pack wasn't written with odd + * parameters. + */ + if (base_pos >= pack_pos) + return 0; + base_bitmap_pos = pack->bitmap_pos + base_pos; + } /* * And finally, if we're not sending the base as part of our @@ -1885,77 +2135,91 @@ static int try_partial_reuse(struct packed_git *pack, * to REF_DELTA on the fly. Better to just let the normal * object_entry code path handle it. */ - if (!bitmap_get(reuse, base_pos)) + if (!bitmap_get(reuse, base_bitmap_pos)) return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ - bitmap_set(reuse, pos); + bitmap_set(reuse, bitmap_pos); return 0; } -uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) +static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + struct bitmap *reuse) { - struct multi_pack_index *m = bitmap_git->midx; - if (!m) - BUG("midx_preferred_pack: requires non-empty MIDX"); - return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); -} - -int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, - uint32_t *entries, - struct bitmap **reuse_out) -{ - struct repository *r = the_repository; - struct packed_git *pack; struct bitmap *result = bitmap_git->result; - struct bitmap *reuse; struct pack_window *w_curs = NULL; - size_t i = 0; - uint32_t offset; - uint32_t objects_nr; + size_t pos = pack->bitmap_pos / BITS_IN_EWORD; - assert(result); + if (!pack->bitmap_pos) { + /* + * If we're processing the first (in the case of a MIDX, the + * preferred pack) or the only (in the case of single-pack + * bitmaps) pack, then we can reuse whole words at a time. + * + * This is because we know that any deltas in this range *must* + * have their bases chosen from the same pack, since: + * + * - In the single pack case, there is no other pack to choose + * them from. + * + * - In the MIDX case, the first pack is the preferred pack, so + * all ties are broken in favor of that pack (i.e. the one + * we're currently processing). So any duplicate bases will be + * resolved in favor of the pack we're processing. + */ + while (pos < result->word_alloc && + pos < pack->bitmap_nr / BITS_IN_EWORD && + result->words[pos] == (eword_t)~0) + pos++; + memset(reuse->words, 0xFF, pos * sizeof(eword_t)); + } - load_reverse_index(r, bitmap_git); + for (; pos < result->word_alloc; pos++) { + eword_t word = result->words[pos]; + size_t offset; - if (bitmap_is_midx(bitmap_git)) - pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; - else - pack = bitmap_git->pack; - objects_nr = pack->num_objects; + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + size_t bit_pos; + uint32_t pack_pos; + off_t ofs; - while (i < result->word_alloc && result->words[i] == (eword_t)~0) - i++; + if (word >> offset == 0) + break; - /* - * Don't mark objects not in the packfile or preferred pack. This bitmap - * marks objects eligible for reuse, but the pack-reuse code only - * understands how to reuse a single pack. Since the preferred pack is - * guaranteed to have all bases for its deltas (in a multi-pack bitmap), - * we use it instead of another pack. In single-pack bitmaps, the choice - * is made for us. - */ - if (i > objects_nr / BITS_IN_EWORD) - i = objects_nr / BITS_IN_EWORD; + offset += ewah_bit_ctz64(word >> offset); - reuse = bitmap_word_alloc(i); - memset(reuse->words, 0xFF, i * sizeof(eword_t)); + bit_pos = pos * BITS_IN_EWORD + offset; + if (bit_pos < pack->bitmap_pos) + continue; + if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr) + goto done; - for (; i < result->word_alloc; ++i) { - eword_t word = result->words[i]; - size_t pos = (i * BITS_IN_EWORD); + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos; - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; + midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); - offset += ewah_bit_ctz64(word >> offset); - if (try_partial_reuse(pack, pos + offset, - reuse, &w_curs) < 0) { + if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0) + BUG("could not find object in pack %s " + "at offset %"PRIuMAX" in MIDX", + pack_basename(pack->p), (uintmax_t)ofs); + } else { + pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos)); + if (pack_pos >= pack->p->num_objects) + BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")", + pack_basename(pack->p), (uintmax_t)pack_pos, + pack->p->num_objects); + + ofs = pack_pos_to_offset(pack->p, pack_pos); + } + + if (try_partial_reuse(bitmap_git, pack, bit_pos, + pack_pos, ofs, reuse, &w_curs) < 0) { /* * try_partial_reuse indicated we couldn't reuse * any bits, so there is no point in trying more @@ -1972,11 +2236,112 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, done: unuse_pack(&w_curs); +} - *entries = bitmap_popcount(reuse); - if (!*entries) { - bitmap_free(reuse); +static int bitmapped_pack_cmp(const void *va, const void *vb) +{ + const struct bitmapped_pack *a = va; + const struct bitmapped_pack *b = vb; + + if (a->bitmap_pos < b->bitmap_pos) return -1; + if (a->bitmap_pos > b->bitmap_pos) + return 1; + return 0; +} + +void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmapped_pack **packs_out, + size_t *packs_nr_out, + struct bitmap **reuse_out, + int multi_pack_reuse) +{ + struct repository *r = the_repository; + struct bitmapped_pack *packs = NULL; + struct bitmap *result = bitmap_git->result; + struct bitmap *reuse; + size_t i; + size_t packs_nr = 0, packs_alloc = 0; + size_t word_alloc; + uint32_t objects_nr = 0; + + assert(result); + + load_reverse_index(r, bitmap_git); + + if (!bitmap_is_midx(bitmap_git) || !bitmap_git->midx->chunk_bitmapped_packs) + multi_pack_reuse = 0; + + if (multi_pack_reuse) { + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + struct bitmapped_pack pack; + if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) { + warning(_("unable to load pack: '%s', disabling pack-reuse"), + bitmap_git->midx->pack_names[i]); + free(packs); + return; + } + + if (!pack.bitmap_nr) + continue; + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + memcpy(&packs[packs_nr++], &pack, sizeof(pack)); + + objects_nr += pack.p->num_objects; + } + + QSORT(packs, packs_nr, bitmapped_pack_cmp); + } else { + struct packed_git *pack; + uint32_t pack_int_id; + + if (bitmap_is_midx(bitmap_git)) { + uint32_t preferred_pack_pos; + + if (midx_preferred_pack(bitmap_git->midx, &preferred_pack_pos) < 0) { + warning(_("unable to compute preferred pack, disabling pack-reuse")); + return; + } + + pack = bitmap_git->midx->packs[preferred_pack_pos]; + pack_int_id = preferred_pack_pos; + } else { + pack = bitmap_git->pack; + /* + * Any value for 'pack_int_id' will do here. When we + * process the pack via try_partial_reuse(), we won't + * use the `pack_int_id` field since we have a non-MIDX + * bitmap. + * + * Use '-1' as a sentinel value to make it clear + * that we do not expect to read this field. + */ + pack_int_id = -1; + } + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + packs[packs_nr].p = pack; + packs[packs_nr].pack_int_id = pack_int_id; + packs[packs_nr].bitmap_nr = pack->num_objects; + packs[packs_nr].bitmap_pos = 0; + packs[packs_nr].from_midx = bitmap_git->midx; + + objects_nr = packs[packs_nr++].bitmap_nr; + } + + word_alloc = objects_nr / BITS_IN_EWORD; + if (objects_nr % BITS_IN_EWORD) + word_alloc++; + reuse = bitmap_word_alloc(word_alloc); + + for (i = 0; i < packs_nr; i++) + reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); + + if (bitmap_is_empty(reuse)) { + free(packs); + bitmap_free(reuse); + return; } /* @@ -1984,9 +2349,9 @@ done: * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = pack; + *packs_out = packs; + *packs_nr_out = packs_nr; *reuse_out = reuse; - return 0; } int bitmap_walk_contains(struct bitmap_index *bitmap_git, @@ -2267,6 +2632,132 @@ cleanup: return 0; } +static void bit_pos_to_object_id(struct bitmap_index *bitmap_git, + uint32_t bit_pos, + struct object_id *oid) +{ + uint32_t index_pos; + + if (bitmap_is_midx(bitmap_git)) + index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + else + index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos); + + nth_bitmap_object_oid(bitmap_git, oid, index_pos); +} + +int test_bitmap_pseudo_merges(struct repository *r) +{ + struct bitmap_index *bitmap_git; + uint32_t i; + + bitmap_git = prepare_bitmap_git(r); + if (!bitmap_git || !bitmap_git->pseudo_merges.nr) + goto cleanup; + + for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) { + struct pseudo_merge *merge; + struct ewah_bitmap *commits_bitmap, *merge_bitmap; + + merge = use_pseudo_merge(&bitmap_git->pseudo_merges, + &bitmap_git->pseudo_merges.v[i]); + commits_bitmap = merge->commits; + merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges, + merge); + + printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n", + (uintmax_t)merge->at, + (uintmax_t)ewah_bitmap_popcount(commits_bitmap), + (uintmax_t)ewah_bitmap_popcount(merge_bitmap)); + } + +cleanup: + free_bitmap_index(bitmap_git); + return 0; +} + +static void dump_ewah_object_ids(struct bitmap_index *bitmap_git, + struct ewah_bitmap *bitmap) + +{ + struct ewah_iterator it; + eword_t word; + uint32_t pos = 0; + + ewah_iterator_init(&it, bitmap); + + while (ewah_iterator_next(&word, &it)) { + struct object_id oid; + uint32_t offset; + + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + if (!(word >> offset)) + break; + + offset += ewah_bit_ctz64(word >> offset); + + bit_pos_to_object_id(bitmap_git, pos + offset, &oid); + printf("%s\n", oid_to_hex(&oid)); + } + pos += BITS_IN_EWORD; + } +} + +int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n) +{ + struct bitmap_index *bitmap_git; + struct pseudo_merge *merge; + int ret = 0; + + bitmap_git = prepare_bitmap_git(r); + if (!bitmap_git || !bitmap_git->pseudo_merges.nr) + goto cleanup; + + if (n >= bitmap_git->pseudo_merges.nr) { + ret = error(_("pseudo-merge index out of range " + "(%"PRIu32" >= %"PRIuMAX")"), + n, (uintmax_t)bitmap_git->pseudo_merges.nr); + goto cleanup; + } + + merge = use_pseudo_merge(&bitmap_git->pseudo_merges, + &bitmap_git->pseudo_merges.v[n]); + dump_ewah_object_ids(bitmap_git, merge->commits); + +cleanup: + free_bitmap_index(bitmap_git); + return ret; +} + +int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n) +{ + struct bitmap_index *bitmap_git; + struct pseudo_merge *merge; + int ret = 0; + + bitmap_git = prepare_bitmap_git(r); + if (!bitmap_git || !bitmap_git->pseudo_merges.nr) + goto cleanup; + + if (n >= bitmap_git->pseudo_merges.nr) { + ret = error(_("pseudo-merge index out of range " + "(%"PRIu32" >= %"PRIuMAX")"), + n, (uintmax_t)bitmap_git->pseudo_merges.nr); + goto cleanup; + } + + merge = use_pseudo_merge(&bitmap_git->pseudo_merges, + &bitmap_git->pseudo_merges.v[n]); + + dump_ewah_object_ids(bitmap_git, + pseudo_merge_bitmap(&bitmap_git->pseudo_merges, + merge)); + +cleanup: + free_bitmap_index(bitmap_git); + return ret; +} + int rebuild_bitmap(const uint32_t *reposition, struct ewah_bitmap *source, struct bitmap *dest) @@ -2373,6 +2864,7 @@ void free_bitmap_index(struct bitmap_index *b) */ close_midx_revindex(b->midx); } + free_pseudo_merge_map(&b->pseudo_merges); free(b); } |