diff options
Diffstat (limited to 'builtin/pack-objects.c')
| -rw-r--r-- | builtin/pack-objects.c | 1002 |
1 files changed, 794 insertions, 208 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1c3b842651..b5454e5df1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -32,7 +32,7 @@ #include "list.h" #include "packfile.h" #include "object-file.h" -#include "object-store-ll.h" +#include "odb.h" #include "replace-object.h" #include "dir.h" #include "midx.h" @@ -41,6 +41,10 @@ #include "promisor-remote.h" #include "pack-mtimes.h" #include "parse-options.h" +#include "blob.h" +#include "tree.h" +#include "path-walk.h" +#include "trace2.h" /* * Objects we are going to pack are collected in the `to_pack` structure. @@ -183,9 +187,15 @@ static inline void oe_set_delta_size(struct packing_data *pack, #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val) #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) -static const char *pack_usage[] = { - N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"), - N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"), +static const char *const pack_usage[] = { + N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n" + " [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n" + " [--local] [--incremental] [--window=<n>] [--depth=<n>]\n" + " [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n" + " [--cruft] [--cruft-expiration=<time>]\n" + " [--stdout [--filter=<filter-spec>] | <base-name>]\n" + " [--shallow] [--keep-true-parents] [--[no-]sparse]\n" + " [--name-hash-version=<n>] [--path-walk] < <object-list>"), NULL }; @@ -200,12 +210,14 @@ static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; static int cruft; +static int shallow = 0; static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; static const char *base_name; @@ -217,6 +229,7 @@ static int delta_search_threads; static int pack_to_stdout; static int sparse; static int thin; +static int path_walk = -1; static int num_preferred_base; static struct progress *progress_state; @@ -269,6 +282,49 @@ struct configured_exclusion { static struct oidmap configured_exclusions; static struct oidset excluded_by_config; +static int name_hash_version = -1; + +enum stdin_packs_mode { + STDIN_PACKS_MODE_NONE, + STDIN_PACKS_MODE_STANDARD, + STDIN_PACKS_MODE_FOLLOW, +}; + +/** + * Check whether the name_hash_version chosen by user input is appropriate, + * and also validate whether it is compatible with other features. + */ +static void validate_name_hash_version(void) +{ + if (name_hash_version < 1 || name_hash_version > 2) + die(_("invalid --name-hash-version option: %d"), name_hash_version); + if (write_bitmap_index && name_hash_version != 1) { + warning(_("currently, --write-bitmap-index requires --name-hash-version=1")); + name_hash_version = 1; + } +} + +static inline uint32_t pack_name_hash_fn(const char *name) +{ + static int seen_version = -1; + + if (seen_version < 0) + seen_version = name_hash_version; + else if (seen_version != name_hash_version) + BUG("name hash version changed from %d to %d mid-process", + seen_version, name_hash_version); + + switch (name_hash_version) { + case 1: + return pack_name_hash(name); + + case 2: + return pack_name_hash_v2((const unsigned char *)name); + + default: + BUG("invalid name-hash version: %d", name_hash_version); + } +} /* * stats @@ -299,13 +355,13 @@ static void *get_delta(struct object_entry *entry) void *buf, *base_buf, *delta_buf; enum object_type type; - buf = repo_read_object_file(the_repository, &entry->idx.oid, &type, - &size); + buf = odb_read_object(the_repository->objects, &entry->idx.oid, + &type, &size); if (!buf) die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); - base_buf = repo_read_object_file(the_repository, - &DELTA(entry)->idx.oid, &type, - &base_size); + base_buf = odb_read_object(the_repository->objects, + &DELTA(entry)->idx.oid, &type, + &base_size); if (!base_buf) die("unable to read %s", oid_to_hex(&DELTA(entry)->idx.oid)); @@ -462,14 +518,15 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && - oe_size_greater_than(&to_pack, entry, big_file_threshold) && + oe_size_greater_than(&to_pack, entry, + repo_settings_get_big_file_threshold(the_repository)) && (st = open_istream(the_repository, &entry->idx.oid, &type, &size, NULL)) != NULL) buf = NULL; else { - buf = repo_read_object_file(the_repository, - &entry->idx.oid, &type, - &size); + buf = odb_read_object(the_repository->objects, + &entry->idx.oid, &type, + &size); if (!buf) die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); @@ -1264,7 +1321,8 @@ static void write_pack_file(void) struct object_entry **write_order; if (progress > pack_to_stdout) - progress_state = start_progress(_("Writing objects"), nr_result); + progress_state = start_progress(the_repository, + _("Writing objects"), nr_result); ALLOC_ARRAY(written_list, to_pack.nr_objects); write_order = compute_write_order(); @@ -1273,9 +1331,10 @@ static void write_pack_file(void) char *pack_tmp_name = NULL; if (pack_to_stdout) - f = hashfd_throughput(1, "<stdout>", progress_state); + f = hashfd_throughput(the_repository->hash_algo, 1, + "<stdout>", progress_state); else - f = create_tmp_packfile(&pack_tmp_name); + f = create_tmp_packfile(the_repository, &pack_tmp_name); offset = write_pack_header(f, nr_remaining); @@ -1318,8 +1377,9 @@ static void write_pack_file(void) */ int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(fd, hash, pack_tmp_name, - nr_written, hash, offset); + fixup_pack_header_footer(the_hash_algo, fd, hash, + pack_tmp_name, nr_written, + hash, offset); close(fd); if (write_bitmap_index) { if (write_bitmap_index != WRITE_BITMAP_QUIET) @@ -1358,7 +1418,8 @@ static void write_pack_file(void) if (write_bitmap_index) { bitmap_writer_init(&bitmap_writer, - the_repository, &to_pack); + the_repository, &to_pack, + NULL); bitmap_writer_set_checksum(&bitmap_writer, hash); bitmap_writer_build_type_index(&bitmap_writer, written_list); @@ -1367,9 +1428,10 @@ static void write_pack_file(void) if (cruft) pack_idx_opts.flags |= WRITE_MTIMES; - stage_tmp_packfiles(&tmpname, pack_tmp_name, - written_list, nr_written, - &to_pack, &pack_idx_opts, hash, + stage_tmp_packfiles(the_repository, &tmpname, + pack_tmp_name, written_list, + nr_written, &to_pack, + &pack_idx_opts, hash, &idx_tmp_name); if (write_bitmap_index) { @@ -1393,7 +1455,7 @@ static void write_pack_file(void) strbuf_setlen(&tmpname, tmpname_len); } - rename_tmp_packfile_idx(&tmpname, &idx_tmp_name); + rename_tmp_packfile_idx(the_repository, &tmpname, &idx_tmp_name); free(idx_tmp_name); strbuf_release(&tmpname); @@ -1462,8 +1524,60 @@ static int have_duplicate_entry(const struct object_id *oid, return 1; } +static int want_cruft_object_mtime(struct repository *r, + const struct object_id *oid, + unsigned flags, uint32_t mtime) +{ + struct packed_git **cache; + + for (cache = kept_pack_cache(r, flags); *cache; cache++) { + struct packed_git *p = *cache; + off_t ofs; + uint32_t candidate_mtime; + + ofs = find_pack_entry_one(oid, p); + if (!ofs) + continue; + + /* + * We have a copy of the object 'oid' in a non-cruft + * pack. We can avoid packing an additional copy + * regardless of what the existing copy's mtime is since + * it is outside of a cruft pack. + */ + if (!p->is_cruft) + return 0; + + /* + * If we have a copy of the object 'oid' in a cruft + * pack, then either read the cruft pack's mtime for + * that object, or, if that can't be loaded, assume the + * pack's mtime itself. + */ + if (!load_pack_mtimes(p)) { + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) + continue; + candidate_mtime = nth_packed_mtime(p, pos); + } else { + candidate_mtime = p->mtime; + } + + /* + * We have a surviving copy of the object in a cruft + * pack whose mtime is greater than or equal to the one + * we are considering. We can thus avoid packing an + * additional copy of that object. + */ + if (mtime <= candidate_mtime) + return 0; + } + + return -1; +} + static int want_found_object(const struct object_id *oid, int exclude, - struct packed_git *p) + struct packed_git *p, uint32_t mtime) { if (exclude) return 1; @@ -1513,12 +1627,29 @@ static int want_found_object(const struct object_id *oid, int exclude, if (ignore_packed_keep_in_core) flags |= IN_CORE_KEEP_PACKS; - if (ignore_packed_keep_on_disk && p->pack_keep) - return 0; - if (ignore_packed_keep_in_core && p->pack_keep_in_core) - return 0; - if (has_object_kept_pack(p->repo, oid, flags)) - return 0; + /* + * If the object is in a pack that we want to ignore, *and* we + * don't have any cruft packs that are being retained, we can + * abort quickly. + */ + if (!ignore_packed_keep_in_core_has_cruft) { + if (ignore_packed_keep_on_disk && p->pack_keep) + return 0; + if (ignore_packed_keep_in_core && p->pack_keep_in_core) + return 0; + if (has_object_kept_pack(p->repo, oid, flags)) + return 0; + } else { + /* + * But if there is at least one cruft pack which + * is being kept, we only want to include the + * provided object if it has a strictly greater + * mtime than any existing cruft copy. + */ + if (!want_cruft_object_mtime(p->repo, oid, flags, + mtime)) + return 0; + } } /* @@ -1537,7 +1668,8 @@ static int want_object_in_pack_one(struct packed_git *p, const struct object_id *oid, int exclude, struct packed_git **found_pack, - off_t *found_offset) + off_t *found_offset, + uint32_t found_mtime) { off_t offset; @@ -1553,7 +1685,7 @@ static int want_object_in_pack_one(struct packed_git *p, *found_offset = offset; *found_pack = p; } - return want_found_object(oid, exclude, p); + return want_found_object(oid, exclude, p, found_mtime); } return -1; } @@ -1567,17 +1699,26 @@ static int want_object_in_pack_one(struct packed_git *p, * function finds if there is any pack that has the object and returns the pack * and its offset in these variables. */ -static int want_object_in_pack(const struct object_id *oid, - int exclude, - struct packed_git **found_pack, - off_t *found_offset) +static int want_object_in_pack_mtime(const struct object_id *oid, + int exclude, + struct packed_git **found_pack, + off_t *found_offset, + uint32_t found_mtime) { int want; + struct odb_source *source; struct list_head *pos; - struct multi_pack_index *m; - if (!exclude && local && has_loose_object_nonlocal(oid)) - return 0; + if (!exclude && local) { + /* + * Note that we start iterating at `sources->next` so that we + * skip the local object source. + */ + struct odb_source *source = the_repository->objects->sources->next; + for (; source; source = source->next) + if (has_loose_object(source, oid)) + return 0; + } /* * If we already know the pack object lives in, start checks from that @@ -1585,7 +1726,8 @@ static int want_object_in_pack(const struct object_id *oid, * are present we will determine the answer right now. */ if (*found_pack) { - want = want_found_object(oid, exclude, *found_pack); + want = want_found_object(oid, exclude, *found_pack, + found_mtime); if (want != -1) return want; @@ -1593,21 +1735,25 @@ static int want_object_in_pack(const struct object_id *oid, *found_offset = 0; } - for (m = get_multi_pack_index(the_repository); m; m = m->next) { + odb_prepare_alternates(the_repository->objects); + + for (source = the_repository->objects->sources; source; source = source->next) { + struct multi_pack_index *m = get_multi_pack_index(source); struct pack_entry e; - if (fill_midx_entry(the_repository, oid, &e, m)) { - want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset); + + if (m && fill_midx_entry(m, oid, &e)) { + want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime); if (want != -1) return want; } } - list_for_each(pos, get_packed_git_mru(the_repository)) { + list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset); + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) list_move(&p->mru, - get_packed_git_mru(the_repository)); + packfile_store_get_packs_mru(the_repository->objects->packfiles)); if (want != -1) return want; } @@ -1634,6 +1780,15 @@ static int want_object_in_pack(const struct object_id *oid, return 1; } +static inline int want_object_in_pack(const struct object_id *oid, + int exclude, + struct packed_git **found_pack, + off_t *found_offset) +{ + return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset, + 0); +} + static struct object_entry *create_object_entry(const struct object_id *oid, enum object_type type, uint32_t hash, @@ -1686,7 +1841,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, return 0; } - create_object_entry(oid, type, pack_name_hash(name), + create_object_entry(oid, type, pack_name_hash_fn(name), exclude, name && no_try_delta(name), found_pack, found_offset); return 1; @@ -1695,7 +1850,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, static int add_object_entry_from_bitmap(const struct object_id *oid, enum object_type type, int flags UNUSED, uint32_t name_hash, - struct packed_git *pack, off_t offset) + struct packed_git *pack, off_t offset, + void *payload UNUSED) { display_progress(progress_state, ++nr_seen); @@ -1769,7 +1925,7 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid) /* Did not find one. Either we got a bogus request or * we need to read and perhaps cache. */ - data = repo_read_object_file(the_repository, oid, &type, &size); + data = odb_read_object(the_repository->objects, oid, &type, &size); if (!data) return NULL; if (type != OBJ_TREE) { @@ -1900,7 +2056,7 @@ static void add_preferred_base_object(const char *name) { struct pbase_tree *it; size_t cmplen; - unsigned hash = pack_name_hash(name); + unsigned hash = pack_name_hash_fn(name); if (!num_preferred_base || check_pbase_path(hash)) return; @@ -1929,8 +2085,8 @@ static void add_preferred_base(struct object_id *oid) if (window <= num_preferred_base++) return; - data = read_object_with_reference(the_repository, oid, - OBJ_TREE, &size, &tree_oid); + data = odb_read_object_peeled(the_repository->objects, oid, + OBJ_TREE, &size, &tree_oid); if (!data) return; @@ -2028,10 +2184,10 @@ static void prefetch_to_pack(uint32_t object_index_start) { for (i = object_index_start; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (!oid_object_info_extended(the_repository, - &entry->idx.oid, - NULL, - OBJECT_INFO_FOR_PREFETCH)) + if (!odb_read_object_info_extended(the_repository->objects, + &entry->idx.oid, + NULL, + OBJECT_INFO_FOR_PREFETCH)) continue; oid_array_append(&to_fetch, &entry->idx.oid); } @@ -2172,19 +2328,19 @@ static void check_object(struct object_entry *entry, uint32_t object_index) /* * No choice but to fall back to the recursive delta walk - * with oid_object_info() to find about the object type + * with odb_read_object_info() to find about the object type * at this point... */ give_up: unuse_pack(&w_curs); } - if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi, - OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) { + if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi, + OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) { if (repo_has_promisor_remote(the_repository)) { prefetch_to_pack(object_index); - if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi, - OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) + if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi, + OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) type = -1; } else { type = -1; @@ -2258,12 +2414,13 @@ static void drop_reused_delta(struct object_entry *entry) if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { /* * We failed to get the info from this pack for some reason; - * fall back to oid_object_info, which may find another copy. + * fall back to odb_read_object_info, which may find another copy. * And if that fails, the error will be recorded in oe_type(entry) * and dealt with in prepare_pack(). */ oe_set_type(entry, - oid_object_info(the_repository, &entry->idx.oid, &size)); + odb_read_object_info(the_repository->objects, + &entry->idx.oid, &size)); } else { oe_set_type(entry, type); } @@ -2400,7 +2557,8 @@ static void get_object_details(void) struct object_entry **sorted_by_offset; if (progress) - progress_state = start_progress(_("Counting objects"), + progress_state = start_progress(the_repository, + _("Counting objects"), to_pack.nr_objects); CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects); @@ -2412,7 +2570,8 @@ static void get_object_details(void) struct object_entry *entry = sorted_by_offset[i]; check_object(entry, i); if (entry->type_valid && - oe_size_greater_than(&to_pack, entry, big_file_threshold)) + oe_size_greater_than(&to_pack, entry, + repo_settings_get_big_file_threshold(the_repository))) entry->no_try_delta = 1; display_progress(progress_state, i + 1); } @@ -2549,7 +2708,8 @@ unsigned long oe_get_size_slow(struct packing_data *pack, if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { packing_data_lock(&to_pack); - if (oid_object_info(the_repository, &e->idx.oid, &size) < 0) + if (odb_read_object_info(the_repository->objects, + &e->idx.oid, &size) < 0) die(_("unable to get size of %s"), oid_to_hex(&e->idx.oid)); packing_data_unlock(&to_pack); @@ -2632,9 +2792,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, /* Load data if not already done */ if (!trg->data) { packing_data_lock(&to_pack); - trg->data = repo_read_object_file(the_repository, - &trg_entry->idx.oid, &type, - &sz); + trg->data = odb_read_object(the_repository->objects, + &trg_entry->idx.oid, &type, + &sz); packing_data_unlock(&to_pack); if (!trg->data) die(_("object %s cannot be read"), @@ -2647,9 +2807,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, } if (!src->data) { packing_data_lock(&to_pack); - src->data = repo_read_object_file(the_repository, - &src_entry->idx.oid, &type, - &sz); + src->data = odb_read_object(the_repository->objects, + &src_entry->idx.oid, &type, + &sz); packing_data_unlock(&to_pack); if (!src->data) { if (src_entry->preferred_base) { @@ -2913,6 +3073,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, struct thread_params { pthread_t thread; struct object_entry **list; + struct packing_region *regions; unsigned list_size; unsigned remaining; int window; @@ -3155,6 +3316,242 @@ static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, cons return 0; } +static int should_attempt_deltas(struct object_entry *entry) +{ + if (DELTA(entry)) + /* This happens if we decided to reuse existing + * delta from a pack. "reuse_delta &&" is implied. + */ + return 0; + + if (!entry->type_valid || + oe_size_less_than(&to_pack, entry, 50)) + return 0; + + if (entry->no_try_delta) + return 0; + + if (!entry->preferred_base) { + if (oe_type(entry) < 0) + die(_("unable to get type of object %s"), + oid_to_hex(&entry->idx.oid)); + } else if (oe_type(entry) < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + return 0; + } + + return 1; +} + +static void find_deltas_for_region(struct object_entry *list, + struct packing_region *region, + unsigned int *processed) +{ + struct object_entry **delta_list; + unsigned int delta_list_nr = 0; + + ALLOC_ARRAY(delta_list, region->nr); + for (size_t i = 0; i < region->nr; i++) { + struct object_entry *entry = list + region->start + i; + if (should_attempt_deltas(entry)) + delta_list[delta_list_nr++] = entry; + } + + QSORT(delta_list, delta_list_nr, type_size_sort); + find_deltas(delta_list, &delta_list_nr, window, depth, processed); + free(delta_list); +} + +static void find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + size_t start, size_t nr) +{ + unsigned int processed = 0; + size_t progress_nr; + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + + while (nr--) + find_deltas_for_region(list, + ®ions[start++], + &processed); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + +static void *threaded_find_deltas_by_path(void *arg) +{ + struct thread_params *me = arg; + + progress_lock(); + while (me->remaining) { + while (me->remaining) { + progress_unlock(); + find_deltas_for_region(to_pack.objects, + me->regions, + me->processed); + progress_lock(); + me->remaining--; + me->regions++; + } + + me->working = 0; + pthread_cond_signal(&progress_cond); + progress_unlock(); + + /* + * We must not set ->data_ready before we wait on the + * condition because the main thread may have set it to 1 + * before we get here. In order to be sure that new + * work is available if we see 1 in ->data_ready, it + * was initialized to 0 before this thread was spawned + * and we reset it to 0 right away. + */ + pthread_mutex_lock(&me->mutex); + while (!me->data_ready) + pthread_cond_wait(&me->cond, &me->mutex); + me->data_ready = 0; + pthread_mutex_unlock(&me->mutex); + + progress_lock(); + } + progress_unlock(); + /* leave ->working 1 so that this doesn't get more work assigned */ + return NULL; +} + +static void ll_find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + uint32_t start, uint32_t nr) +{ + struct thread_params *p; + int i, ret, active_threads = 0; + unsigned int processed = 0; + uint32_t progress_nr; + init_threaded_search(); + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + if (delta_search_threads <= 1) { + find_deltas_by_region(list, regions, start, nr); + cleanup_threaded_search(); + return; + } + + if (progress > pack_to_stdout) + fprintf_ln(stderr, + Q_("Path-based delta compression using up to %d thread", + "Path-based delta compression using up to %d threads", + delta_search_threads), + delta_search_threads); + CALLOC_ARRAY(p, delta_search_threads); + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + /* Partition the work amongst work threads. */ + for (i = 0; i < delta_search_threads; i++) { + unsigned sub_size = nr / (delta_search_threads - i); + + p[i].window = window; + p[i].depth = depth; + p[i].processed = &processed; + p[i].working = 1; + p[i].data_ready = 0; + + p[i].regions = regions; + p[i].list_size = sub_size; + p[i].remaining = sub_size; + + regions += sub_size; + nr -= sub_size; + } + + /* Start work threads. */ + for (i = 0; i < delta_search_threads; i++) { + if (!p[i].list_size) + continue; + pthread_mutex_init(&p[i].mutex, NULL); + pthread_cond_init(&p[i].cond, NULL); + ret = pthread_create(&p[i].thread, NULL, + threaded_find_deltas_by_path, &p[i]); + if (ret) + die(_("unable to create thread: %s"), strerror(ret)); + active_threads++; + } + + /* + * Now let's wait for work completion. Each time a thread is done + * with its work, we steal half of the remaining work from the + * thread with the largest number of unprocessed objects and give + * it to that newly idle thread. This ensure good load balancing + * until the remaining object list segments are simply too short + * to be worth splitting anymore. + */ + while (active_threads) { + struct thread_params *target = NULL; + struct thread_params *victim = NULL; + unsigned sub_size = 0; + + progress_lock(); + for (;;) { + for (i = 0; !target && i < delta_search_threads; i++) + if (!p[i].working) + target = &p[i]; + if (target) + break; + pthread_cond_wait(&progress_cond, &progress_mutex); + } + + for (i = 0; i < delta_search_threads; i++) + if (p[i].remaining > 2*window && + (!victim || victim->remaining < p[i].remaining)) + victim = &p[i]; + if (victim) { + sub_size = victim->remaining / 2; + target->regions = victim->regions + victim->remaining - sub_size; + victim->list_size -= sub_size; + victim->remaining -= sub_size; + } + target->list_size = sub_size; + target->remaining = sub_size; + target->working = 1; + progress_unlock(); + + pthread_mutex_lock(&target->mutex); + target->data_ready = 1; + pthread_cond_signal(&target->cond); + pthread_mutex_unlock(&target->mutex); + + if (!sub_size) { + pthread_join(target->thread, NULL); + pthread_cond_destroy(&target->cond); + pthread_mutex_destroy(&target->mutex); + active_threads--; + } + } + cleanup_threaded_search(); + free(p); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3179,39 +3576,21 @@ static void prepare_pack(int window, int depth) if (!to_pack.nr_objects || !window || !depth) return; + if (path_walk) + ll_find_deltas_by_region(to_pack.objects, to_pack.regions, + 0, to_pack.nr_regions); + ALLOC_ARRAY(delta_list, to_pack.nr_objects); nr_deltas = n = 0; for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (DELTA(entry)) - /* This happens if we decided to reuse existing - * delta from a pack. "reuse_delta &&" is implied. - */ - continue; - - if (!entry->type_valid || - oe_size_less_than(&to_pack, entry, 50)) - continue; - - if (entry->no_try_delta) + if (!should_attempt_deltas(entry)) continue; - if (!entry->preferred_base) { + if (!entry->preferred_base) nr_deltas++; - if (oe_type(entry) < 0) - die(_("unable to get type of object %s"), - oid_to_hex(&entry->idx.oid)); - } else { - if (oe_type(entry) < 0) { - /* - * This object is not found, but we - * don't have to include it anyway. - */ - continue; - } - } delta_list[n++] = entry; } @@ -3220,7 +3599,8 @@ static void prepare_pack(int window, int depth) unsigned nr_done = 0; if (progress) - progress_state = start_progress(_("Compressing objects"), + progress_state = start_progress(the_repository, + _("Compressing objects"), nr_deltas); QSORT(delta_list, n, type_size_sort); ll_find_deltas(delta_list, n, window+1, depth, &nr_done); @@ -3365,7 +3745,6 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; if (p) { - struct rev_info *revs = _data; struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; @@ -3373,6 +3752,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, die(_("could not get type of object %s in pack %s"), oid_to_hex(oid), p->pack_name); } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; /* * commits in included packs are used as starting points for the * subsequent revision walk @@ -3388,32 +3768,48 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; } -static void show_commit_pack_hint(struct commit *commit UNUSED, - void *data UNUSED) +static void show_object_pack_hint(struct object *object, const char *name, + void *data) { - /* nothing to do; commits don't have a namehash */ + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + if (mode == STDIN_PACKS_MODE_FOLLOW) { + if (object->type == OBJ_BLOB && + !odb_has_object(the_repository->objects, &object->oid, 0)) + return; + add_object_entry(&object->oid, object->type, name, 0); + } else { + struct object_entry *oe = packlist_find(&to_pack, &object->oid); + if (!oe) + return; + + /* + * Our 'to_pack' list was constructed by iterating all + * objects packed in included packs, and so doesn't have + * a non-zero hash field that you would typically pick + * up during a reachability traversal. + * + * Make a best-effort attempt to fill in the ->hash and + * ->no_try_delta fields here in order to perhaps + * improve the delta selection process. + */ + oe->hash = pack_name_hash_fn(name); + oe->no_try_delta = name && no_try_delta(name); + + stdin_packs_hints_nr++; + } } -static void show_object_pack_hint(struct object *object, const char *name, - void *data UNUSED) +static void show_commit_pack_hint(struct commit *commit, void *data) { - struct object_entry *oe = packlist_find(&to_pack, &object->oid); - if (!oe) + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + + if (mode == STDIN_PACKS_MODE_FOLLOW) { + show_object_pack_hint((struct object *)commit, "", data); return; + } - /* - * Our 'to_pack' list was constructed by iterating all objects packed in - * included packs, and so doesn't have a non-zero hash field that you - * would typically pick up during a reachability traversal. - * - * Make a best-effort attempt to fill in the ->hash and ->no_try_delta - * here using a now in order to perhaps improve the delta selection - * process. - */ - oe->hash = pack_name_hash(name); - oe->no_try_delta = name && no_try_delta(name); + /* nothing to do; commits don't have a namehash */ - stdin_packs_hints_nr++; } static int pack_mtime_cmp(const void *_a, const void *_b) @@ -3433,32 +3829,13 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } -static void read_packs_list_from_stdin(void) +static void read_packs_list_from_stdin(struct rev_info *revs) { struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; struct string_list_item *item = NULL; - struct packed_git *p; - struct rev_info revs; - - repo_init_revisions(the_repository, &revs, NULL); - /* - * Use a revision walk to fill in the namehash of objects in the include - * packs. To save time, we'll avoid traversing through objects that are - * in excluded packs. - * - * That may cause us to avoid populating all of the namehash fields of - * all included objects, but our goal is best-effort, since this is only - * an optimization during delta selection. - */ - revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs.blob_objects = 1; - revs.tree_objects = 1; - revs.tag_objects = 1; - revs.ignore_missing_links = 1; while (strbuf_getline(&buf, stdin) != EOF) { if (!buf.len) @@ -3477,7 +3854,7 @@ static void read_packs_list_from_stdin(void) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -3528,25 +3905,55 @@ static void read_packs_list_from_stdin(void) struct packed_git *p = item->util; for_each_object_in_pack(p, add_object_entry_from_pack, - &revs, + revs, FOR_EACH_OBJECT_PACK_ORDER); } + strbuf_release(&buf); + string_list_clear(&include_packs, 0); + string_list_clear(&exclude_packs, 0); +} + +static void add_unreachable_loose_objects(struct rev_info *revs); + +static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) +{ + struct rev_info revs; + + repo_init_revisions(the_repository, &revs, NULL); + /* + * Use a revision walk to fill in the namehash of objects in the include + * packs. To save time, we'll avoid traversing through objects that are + * in excluded packs. + * + * That may cause us to avoid populating all of the namehash fields of + * all included objects, but our goal is best-effort, since this is only + * an optimization during delta selection. + */ + revs.no_kept_objects = 1; + revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.blob_objects = 1; + revs.tree_objects = 1; + revs.tag_objects = 1; + revs.ignore_missing_links = 1; + + /* avoids adding objects in excluded packs */ + ignore_packed_keep_in_core = 1; + read_packs_list_from_stdin(&revs); + if (rev_list_unpacked) + add_unreachable_loose_objects(&revs); + if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); traverse_commit_list(&revs, show_commit_pack_hint, show_object_pack_hint, - NULL); + &mode); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", stdin_packs_hints_nr); - - strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); } static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, @@ -3560,13 +3967,20 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type entry = packlist_find(&to_pack, oid); if (entry) { if (name) { - entry->hash = pack_name_hash(name); + entry->hash = pack_name_hash_fn(name); entry->no_try_delta = no_try_delta(name); } } else { - if (!want_object_in_pack(oid, 0, &pack, &offset)) + if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime)) return; - if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) { + if (!pack && type == OBJ_BLOB) { + struct odb_source *source = the_repository->objects->sources; + int found = 0; + + for (; !found && source; source = source->next) + if (has_loose_object(source, oid)) + found = 1; + /* * If a traversed tree has a missing blob then we want * to avoid adding that missing object to our pack. @@ -3580,10 +3994,11 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type * limited to "ensure non-tip blobs which don't exist in * packs do exist via loose objects". Confused? */ - return; + if (!found) + return; } - entry = create_object_entry(oid, type, pack_name_hash(name), + entry = create_object_entry(oid, type, pack_name_hash_fn(name), 0, name && no_try_delta(name), pack, offset); } @@ -3638,20 +4053,22 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) struct packed_git *p = item->util; if (!p) die(_("could not find pack '%s'"), item->string); + if (p->is_cruft && keep) + ignore_packed_keep_in_core_has_cruft = 1; p->pack_keep_in_core = keep; } } -static void add_unreachable_loose_objects(void); static void add_objects_in_unpacked_packs(void); static void enumerate_cruft_objects(void) { if (progress) - progress_state = start_progress(_("Enumerating cruft objects"), 0); + progress_state = start_progress(the_repository, + _("Enumerating cruft objects"), 0); add_objects_in_unpacked_packs(); - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); stop_progress(&progress_state); } @@ -3674,7 +4091,8 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs revs.ignore_missing_links = 1; if (progress) - progress_state = start_progress(_("Enumerating cruft objects"), 0); + progress_state = start_progress(the_repository, + _("Enumerating cruft objects"), 0); ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration, set_cruft_mtime, 1); stop_progress(&progress_state); @@ -3686,14 +4104,15 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = get_all_packs(the_repository); p; p = p->next) + repo_for_each_pack(the_repository, p) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); if (progress) - progress_state = start_progress(_("Traversing cruft objects"), 0); + progress_state = start_progress(the_repository, + _("Traversing cruft objects"), 0); nr_seen = 0; traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); @@ -3722,7 +4141,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -3799,7 +4218,7 @@ static void show_commit(struct commit *commit, void *data UNUSED) index_commit_for_bitmap(commit); if (use_delta_islands) - propagate_island_marks(commit); + propagate_island_marks(the_repository, commit); } static void show_object(struct object *obj, const char *name, @@ -3832,7 +4251,7 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void * Quietly ignore ALL missing objects. This avoids problems with * staging them now and getting an odd error later. */ - if (!has_object(the_repository, &obj->oid, 0)) + if (!odb_has_object(the_repository->objects, &obj->oid, 0)) return; show_object(obj, name, data); @@ -3846,7 +4265,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name, * Quietly ignore EXPECTED missing objects. This avoids problems with * staging them now and getting an odd error later. */ - if (!has_object(the_repository, &obj->oid, 0) && + if (!odb_has_object(the_repository->objects, &obj->oid, 0) && is_promisor_object(to_pack.repo, &obj->oid)) return; @@ -3927,9 +4346,10 @@ static void add_objects_in_unpacked_packs(void) } static int add_loose_object(const struct object_id *oid, const char *path, - void *data UNUSED) + void *data) { - enum object_type type = oid_object_info(the_repository, oid, NULL); + struct rev_info *revs = data; + enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL); if (type < 0) { warning(_("loose object at %s could not be examined"), path); @@ -3949,6 +4369,10 @@ static int add_loose_object(const struct object_id *oid, const char *path, } else { add_object_entry(oid, type, "", 0); } + + if (revs && type == OBJ_COMMIT) + add_pending_oid(revs, NULL, oid, 0); + return 0; } @@ -3957,20 +4381,20 @@ static int add_loose_object(const struct object_id *oid, const char *path, * add_object_entry will weed out duplicates, so we just add every * loose object we find. */ -static void add_unreachable_loose_objects(void) +static void add_unreachable_loose_objects(struct rev_info *revs) { - for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), - add_loose_object, - NULL, NULL, NULL); + for_each_loose_file_in_source(the_repository->objects->sources, + add_loose_object, NULL, NULL, revs); } static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { + struct packfile_store *packs = the_repository->objects->packfiles; static struct packed_git *last_found = (void *)1; struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_all_packs(the_repository); + packfile_store_get_packs(packs); while (p) { if ((!p->pack_local || p->pack_keep || @@ -3980,7 +4404,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_all_packs(the_repository); + p = packfile_store_get_packs(packs); else p = p->next; if (p == last_found) @@ -4017,7 +4441,7 @@ static void loosen_unused_packed_objects(void) uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4029,7 +4453,8 @@ static void loosen_unused_packed_objects(void) if (!packlist_find(&to_pack, &oid) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) { - if (force_object_loose(&oid, p->mtime)) + if (force_object_loose(the_repository->objects->sources, + &oid, p->mtime)) die(_("unable to force loose object")); loosened_objects_nr++; } @@ -4060,6 +4485,15 @@ static int get_object_list_from_bitmap(struct rev_info *revs) if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; + /* + * For now, force the name-hash version to be 1 since that + * is the version implied by the bitmap format. Later, the + * format can include this version explicitly in its format, + * allowing readers to know the version that was used during + * the bitmap write. + */ + name_hash_version = 1; + if (pack_options_allow_reuse()) reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfiles, @@ -4106,7 +4540,7 @@ static int mark_bitmap_preferred_tip(const char *refname, if (!peel_iterated_oid(the_repository, oid, &peeled)) oid = &peeled; - object = parse_object_or_die(oid, refname); + object = parse_object_or_die(the_repository, oid, refname); if (object->type == OBJ_COMMIT) object->flags |= NEEDS_BITMAP; @@ -4129,7 +4563,94 @@ static void mark_bitmap_preferred_tips(void) } } -static void get_object_list(struct rev_info *revs, int ac, const char **av) +static inline int is_oid_uninteresting(struct repository *repo, + struct object_id *oid) +{ + struct object *o = lookup_object(repo, oid); + return !o || (o->flags & UNINTERESTING); +} + +static int add_objects_by_path(const char *path, + struct oid_array *oids, + enum object_type type, + void *data) +{ + size_t oe_start = to_pack.nr_objects; + size_t oe_end; + unsigned int *processed = data; + + /* + * First, add all objects to the packing data, including the ones + * marked UNINTERESTING (translated to 'exclude') as they can be + * used as delta bases. + */ + for (size_t i = 0; i < oids->nr; i++) { + int exclude; + struct object_info oi = OBJECT_INFO_INIT; + struct object_id *oid = &oids->oid[i]; + + /* Skip objects that do not exist locally. */ + if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) && + odb_read_object_info_extended(the_repository->objects, oid, &oi, + OBJECT_INFO_FOR_PREFETCH) < 0) + continue; + + exclude = is_oid_uninteresting(the_repository, oid); + + if (exclude && !thin) + continue; + + add_object_entry(oid, type, path, exclude); + } + + oe_end = to_pack.nr_objects; + + /* We can skip delta calculations if it is a no-op. */ + if (oe_end == oe_start || !window) + return 0; + + ALLOC_GROW(to_pack.regions, + to_pack.nr_regions + 1, + to_pack.nr_regions_alloc); + + to_pack.regions[to_pack.nr_regions].start = oe_start; + to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; + to_pack.nr_regions++; + + *processed += oids->nr; + display_progress(progress_state, *processed); + + return 0; +} + +static void get_object_list_path_walk(struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + unsigned int processed = 0; + int result; + + info.revs = revs; + info.path_fn = add_objects_by_path; + info.path_fn_data = &processed; + + /* + * Allow the --[no-]sparse option to be interesting here, if only + * for testing purposes. Paths with no interesting objects will not + * contribute to the resulting pack, but only create noisy preferred + * base objects. + */ + info.prune_all_uninteresting = sparse; + info.edge_aggressive = shallow; + + trace2_region_enter("pack-objects", "path-walk", revs->repo); + result = walk_objects_by_path(&info); + trace2_region_leave("pack-objects", "path-walk", revs->repo); + + if (result) + die(_("failed to pack objects via path-walk")); +} + +static void get_object_list(struct rev_info *revs, struct strvec *argv) { struct setup_revision_opt s_r_opt = { .allow_exclude_promisor_objects = 1, @@ -4139,7 +4660,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) int save_warning; save_commit_buffer = 0; - setup_revisions(ac, av, revs, &s_r_opt); + setup_revisions_from_strvec(argv, revs, &s_r_opt); /* make sure shallows are read */ is_repository_shallow(the_repository); @@ -4184,15 +4705,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (write_bitmap_index) mark_bitmap_preferred_tips(); - if (prepare_revision_walk(revs)) - die(_("revision walk setup failed")); - mark_edges_uninteresting(revs, show_edge, sparse); - if (!fn_show_object) fn_show_object = show_object; - traverse_commit_list(revs, - show_commit, fn_show_object, - NULL); + + if (path_walk) { + get_object_list_path_walk(revs); + } else { + if (prepare_revision_walk(revs)) + die(_("revision walk setup failed")); + mark_edges_uninteresting(revs, show_edge, sparse); + traverse_commit_list(revs, + show_commit, fn_show_object, + NULL); + } if (unpack_unreachable_expiration) { revs->ignore_missing_links = 1; @@ -4208,7 +4733,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (keep_unreachable) add_objects_in_unpacked_packs(); if (pack_loose_unreachable) - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); if (unpack_unreachable) loosen_unused_packed_objects(); @@ -4222,7 +4747,7 @@ static void add_extra_kept_packs(const struct string_list *names) if (!names->nr) return; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *name = basename(p->pack_name); int i; @@ -4306,7 +4831,7 @@ static int option_parse_cruft_expiration(const struct option *opt UNUSED, static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED) { struct object_info info = OBJECT_INFO_INIT; - if (oid_object_info_extended(the_repository, &obj->oid, &info, 0)) + if (odb_read_object_info_extended(the_repository->objects, &obj->oid, &info, 0)) BUG("should_include_obj should only be called on existing objects"); return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor; } @@ -4315,18 +4840,34 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) { return is_not_in_promisor_pack_obj((struct object *) commit, data); } +static int parse_stdin_packs_mode(const struct option *opt, const char *arg, + int unset) +{ + enum stdin_packs_mode *mode = opt->value; + + if (unset) + *mode = STDIN_PACKS_MODE_NONE; + else if (!arg || !*arg) + *mode = STDIN_PACKS_MODE_STANDARD; + else if (!strcmp(arg, "follow")) + *mode = STDIN_PACKS_MODE_FOLLOW; + else + die(_("invalid value for '%s': '%s'"), opt->long_name, arg); + + return 0; +} + int cmd_pack_objects(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { int use_internal_rev_list = 0; - int shallow = 0; int all_progress_implied = 0; struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; int rev_list_index = 0; - int stdin_packs = 0; + enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; @@ -4345,16 +4886,16 @@ int cmd_pack_objects(int argc, OPT_CALLBACK_F(0, "index-version", &pack_idx_opts, N_("<version>[,<offset>]"), N_("write the pack index file in the specified idx format version"), PARSE_OPT_NONEG, option_parse_index_version), - OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit, - N_("maximum size of each output pack file")), + OPT_UNSIGNED(0, "max-pack-size", &pack_size_limit, + N_("maximum size of each output pack file")), OPT_BOOL(0, "local", &local, N_("ignore borrowed objects from alternate object store")), OPT_BOOL(0, "incremental", &incremental, N_("ignore packed objects")), OPT_INTEGER(0, "window", &window, N_("limit pack window by objects")), - OPT_MAGNITUDE(0, "window-memory", &window_memory_limit, - N_("limit pack window by memory in addition to object limit")), + OPT_UNSIGNED(0, "window-memory", &window_memory_limit, + N_("limit pack window by memory in addition to object limit")), OPT_INTEGER(0, "depth", &depth, N_("maximum length of delta chain allowed in the resulting pack")), OPT_BOOL(0, "reuse-delta", &reuse_delta, @@ -4381,6 +4922,9 @@ int cmd_pack_objects(int argc, OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, N_("include objects referred to by the index"), 1, PARSE_OPT_NONEG), + OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"), + N_("read packs from stdin"), + PARSE_OPT_OPTARG, parse_stdin_packs_mode), OPT_BOOL(0, "stdin-packs", &stdin_packs, N_("read packs from stdin")), OPT_BOOL(0, "stdout", &pack_to_stdout, @@ -4402,6 +4946,8 @@ int cmd_pack_objects(int argc, N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, N_("create thin packs")), + OPT_BOOL(0, "path-walk", &path_walk, + N_("use the path-walk API to walk objects when possible")), OPT_BOOL(0, "shallow", &shallow, N_("create packs suitable for shallow fetches")), OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk, @@ -4435,6 +4981,8 @@ int cmd_pack_objects(int argc, OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, N_("protocol"), N_("exclude any configured uploadpack.blobpackfileuri with this protocol")), + OPT_INTEGER(0, "name-hash-version", &name_hash_version, + N_("use the specified name-hash function to group similar objects")), OPT_END(), }; @@ -4454,7 +5002,7 @@ int cmd_pack_objects(int argc, reset_pack_idx_option(&pack_idx_opts); pack_idx_opts.flags |= WRITE_REV; - git_config(git_pack_config, NULL); + repo_config(the_repository, git_pack_config, NULL); if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0)) pack_idx_opts.flags &= ~WRITE_REV; @@ -4469,6 +5017,17 @@ int cmd_pack_objects(int argc, if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); + if (path_walk < 0) { + if (use_bitmap_index > 0 || + !use_internal_rev_list) + path_walk = 0; + else if (the_repository->gitdir && + the_repository->settings.pack_use_path_walk) + path_walk = 1; + else + path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0); + } + if (depth < 0) depth = 0; if (depth >= (1 << OE_DEPTH_BITS)) { @@ -4485,7 +5044,28 @@ int cmd_pack_objects(int argc, window = 0; strvec_push(&rp, "pack-objects"); - if (thin) { + + if (path_walk) { + const char *option = NULL; + if (filter_options.choice) + option = "--filter"; + else if (use_delta_islands) + option = "--delta-islands"; + + if (option) { + warning(_("cannot use %s with %s"), + option, "--path-walk"); + path_walk = 0; + } + } + if (path_walk) { + strvec_push(&rp, "--boundary"); + /* + * We must disable the bitmaps because we are removing + * the --objects / --objects-edge[-aggressive] options. + */ + use_bitmap_index = 0; + } else if (thin) { use_internal_rev_list = 1; strvec_push(&rp, shallow ? "--objects-edge-aggressive" @@ -4510,9 +5090,10 @@ int cmd_pack_objects(int argc, strvec_push(&rp, "--unpacked"); } - if (exclude_promisor_objects && exclude_promisor_objects_best_effort) - die(_("options '%s' and '%s' cannot be used together"), - "--exclude-promisor-objects", "--exclude-promisor-objects-best-effort"); + die_for_incompatible_opt2(exclude_promisor_objects, + "--exclude-promisor-objects", + exclude_promisor_objects_best_effort, + "--exclude-promisor-objects-best-effort"); if (exclude_promisor_objects) { use_internal_rev_list = 1; fetch_if_missing = 0; @@ -4550,13 +5131,14 @@ int cmd_pack_objects(int argc, if (!pack_to_stdout && thin) die(_("--thin cannot be used to build an indexable pack")); - if (keep_unreachable && unpack_unreachable) - die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable"); + die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable", + unpack_unreachable, "--unpack-unreachable"); if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; - if (stdin_packs && filter_options.choice) - die(_("cannot use --filter with --stdin-packs")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + filter_options.choice, "--filter"); + if (stdin_packs && use_internal_rev_list) die(_("cannot use internal rev list with --stdin-packs")); @@ -4564,8 +5146,8 @@ int cmd_pack_objects(int argc, if (cruft) { if (use_internal_rev_list) die(_("cannot use internal rev list with --cruft")); - if (stdin_packs) - die(_("cannot use --stdin-packs with --cruft")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + cruft, "--cruft"); } /* @@ -4590,6 +5172,11 @@ int cmd_pack_objects(int argc, if (pack_to_stdout || !rev_list_all) write_bitmap_index = 0; + if (name_hash_version < 0) + name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1); + + validate_name_hash_version(); + if (use_delta_islands) strvec_push(&rp, "--topo-order"); @@ -4599,7 +5186,8 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) + + repo_for_each_pack(the_repository, p) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -4612,7 +5200,8 @@ int cmd_pack_objects(int argc, * it also covers non-local objects */ struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + + repo_for_each_pack(the_repository, p) { if (!p->pack_local) { have_non_local_packs = 1; break; @@ -4625,13 +5214,10 @@ int cmd_pack_objects(int argc, prepare_packing_data(the_repository, &to_pack); if (progress && !cruft) - progress_state = start_progress(_("Enumerating objects"), 0); + progress_state = start_progress(the_repository, + _("Enumerating objects"), 0); if (stdin_packs) { - /* avoids adding objects in excluded packs */ - ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(); - if (rev_list_unpacked) - add_unreachable_loose_objects(); + read_stdin_packs(stdin_packs, rev_list_unpacked); } else if (cruft) { read_cruft_objects(); } else if (!use_internal_rev_list) { @@ -4645,7 +5231,7 @@ int cmd_pack_objects(int argc, revs.include_check = is_not_in_promisor_pack; revs.include_check_obj = is_not_in_promisor_pack_obj; } - get_object_list(&revs, rp.nr, rp.v); + get_object_list(&revs, &rp); release_revisions(&revs); } cleanup_preferred_base(); |
