diff options
Diffstat (limited to 'builtin/pack-objects.c')
| -rw-r--r-- | builtin/pack-objects.c | 814 |
1 files changed, 622 insertions, 192 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8b33edc2ff..7937106ec5 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -32,7 +32,7 @@ #include "list.h" #include "packfile.h" #include "object-file.h" -#include "object-store.h" +#include "odb.h" #include "replace-object.h" #include "dir.h" #include "midx.h" @@ -41,6 +41,10 @@ #include "promisor-remote.h" #include "pack-mtimes.h" #include "parse-options.h" +#include "blob.h" +#include "tree.h" +#include "path-walk.h" +#include "trace2.h" /* * Objects we are going to pack are collected in the `to_pack` structure. @@ -184,8 +188,14 @@ static inline void oe_set_delta_size(struct packing_data *pack, #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) static const char *const pack_usage[] = { - N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"), - N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"), + N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n" + " [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n" + " [--local] [--incremental] [--window=<n>] [--depth=<n>]\n" + " [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n" + " [--cruft] [--cruft-expiration=<time>]\n" + " [--stdout [--filter=<filter-spec>] | <base-name>]\n" + " [--shallow] [--keep-true-parents] [--[no-]sparse]\n" + " [--name-hash-version=<n>] [--path-walk] < <object-list>"), NULL }; @@ -200,6 +210,7 @@ static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; static int cruft; +static int shallow = 0; static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; @@ -218,6 +229,7 @@ static int delta_search_threads; static int pack_to_stdout; static int sparse; static int thin; +static int path_walk = -1; static int num_preferred_base; static struct progress *progress_state; @@ -272,6 +284,12 @@ static struct oidmap configured_exclusions; static struct oidset excluded_by_config; static int name_hash_version = -1; +enum stdin_packs_mode { + STDIN_PACKS_MODE_NONE, + STDIN_PACKS_MODE_STANDARD, + STDIN_PACKS_MODE_FOLLOW, +}; + /** * Check whether the name_hash_version chosen by user input is appropriate, * and also validate whether it is compatible with other features. @@ -337,13 +355,13 @@ static void *get_delta(struct object_entry *entry) void *buf, *base_buf, *delta_buf; enum object_type type; - buf = repo_read_object_file(the_repository, &entry->idx.oid, &type, - &size); + buf = odb_read_object(the_repository->objects, &entry->idx.oid, + &type, &size); if (!buf) die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); - base_buf = repo_read_object_file(the_repository, - &DELTA(entry)->idx.oid, &type, - &base_size); + base_buf = odb_read_object(the_repository->objects, + &DELTA(entry)->idx.oid, &type, + &base_size); if (!base_buf) die("unable to read %s", oid_to_hex(&DELTA(entry)->idx.oid)); @@ -506,9 +524,9 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent &size, NULL)) != NULL) buf = NULL; else { - buf = repo_read_object_file(the_repository, - &entry->idx.oid, &type, - &size); + buf = odb_read_object(the_repository->objects, + &entry->idx.oid, &type, + &size); if (!buf) die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); @@ -813,15 +831,14 @@ static enum write_one_status write_one(struct hashfile *f, return WRITE_ONE_WRITTEN; } -static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int mark_tagged(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid); + struct object_entry *entry = packlist_find(&to_pack, ref->oid); if (entry) entry->tagged = 1; - if (!peel_iterated_oid(the_repository, oid, &peeled)) { + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) { entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; @@ -1437,7 +1454,7 @@ static void write_pack_file(void) strbuf_setlen(&tmpname, tmpname_len); } - rename_tmp_packfile_idx(&tmpname, &idx_tmp_name); + rename_tmp_packfile_idx(the_repository, &tmpname, &idx_tmp_name); free(idx_tmp_name); strbuf_release(&tmpname); @@ -1688,11 +1705,19 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; - struct list_head *pos; - struct multi_pack_index *m; + struct packfile_list_entry *e; + struct odb_source *source; - if (!exclude && local && has_loose_object_nonlocal(oid)) - return 0; + if (!exclude && local) { + /* + * Note that we start iterating at `sources->next` so that we + * skip the local object source. + */ + struct odb_source *source = the_repository->objects->sources->next; + for (; source; source = source->next) + if (odb_source_loose_has_object(source, oid)) + return 0; + } /* * If we already know the pack object lives in, start checks from that @@ -1709,21 +1734,24 @@ static int want_object_in_pack_mtime(const struct object_id *oid, *found_offset = 0; } - for (m = get_multi_pack_index(the_repository); m; m = m->next) { + odb_prepare_alternates(the_repository->objects); + + for (source = the_repository->objects->sources; source; source = source->next) { + struct multi_pack_index *m = get_multi_pack_index(source); struct pack_entry e; - if (fill_midx_entry(the_repository, oid, &e, m)) { + + if (m && fill_midx_entry(m, oid, &e)) { want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime); if (want != -1) return want; } } - list_for_each(pos, get_packed_git_mru(the_repository)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { + struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - list_move(&p->mru, - get_packed_git_mru(the_repository)); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } @@ -1895,7 +1923,7 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid) /* Did not find one. Either we got a bogus request or * we need to read and perhaps cache. */ - data = repo_read_object_file(the_repository, oid, &type, &size); + data = odb_read_object(the_repository->objects, oid, &type, &size); if (!data) return NULL; if (type != OBJ_TREE) { @@ -2055,8 +2083,8 @@ static void add_preferred_base(struct object_id *oid) if (window <= num_preferred_base++) return; - data = read_object_with_reference(the_repository, oid, - OBJ_TREE, &size, &tree_oid); + data = odb_read_object_peeled(the_repository->objects, oid, + OBJ_TREE, &size, &tree_oid); if (!data) return; @@ -2154,10 +2182,10 @@ static void prefetch_to_pack(uint32_t object_index_start) { for (i = object_index_start; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (!oid_object_info_extended(the_repository, - &entry->idx.oid, - NULL, - OBJECT_INFO_FOR_PREFETCH)) + if (!odb_read_object_info_extended(the_repository->objects, + &entry->idx.oid, + NULL, + OBJECT_INFO_FOR_PREFETCH)) continue; oid_array_append(&to_fetch, &entry->idx.oid); } @@ -2298,19 +2326,19 @@ static void check_object(struct object_entry *entry, uint32_t object_index) /* * No choice but to fall back to the recursive delta walk - * with oid_object_info() to find about the object type + * with odb_read_object_info() to find about the object type * at this point... */ give_up: unuse_pack(&w_curs); } - if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi, - OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) { + if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi, + OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) { if (repo_has_promisor_remote(the_repository)) { prefetch_to_pack(object_index); - if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi, - OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) + if (odb_read_object_info_extended(the_repository->objects, &entry->idx.oid, &oi, + OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) type = -1; } else { type = -1; @@ -2384,12 +2412,13 @@ static void drop_reused_delta(struct object_entry *entry) if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { /* * We failed to get the info from this pack for some reason; - * fall back to oid_object_info, which may find another copy. + * fall back to odb_read_object_info, which may find another copy. * And if that fails, the error will be recorded in oe_type(entry) * and dealt with in prepare_pack(). */ oe_set_type(entry, - oid_object_info(the_repository, &entry->idx.oid, &size)); + odb_read_object_info(the_repository->objects, + &entry->idx.oid, &size)); } else { oe_set_type(entry, type); } @@ -2677,7 +2706,8 @@ unsigned long oe_get_size_slow(struct packing_data *pack, if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { packing_data_lock(&to_pack); - if (oid_object_info(the_repository, &e->idx.oid, &size) < 0) + if (odb_read_object_info(the_repository->objects, + &e->idx.oid, &size) < 0) die(_("unable to get size of %s"), oid_to_hex(&e->idx.oid)); packing_data_unlock(&to_pack); @@ -2760,9 +2790,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, /* Load data if not already done */ if (!trg->data) { packing_data_lock(&to_pack); - trg->data = repo_read_object_file(the_repository, - &trg_entry->idx.oid, &type, - &sz); + trg->data = odb_read_object(the_repository->objects, + &trg_entry->idx.oid, &type, + &sz); packing_data_unlock(&to_pack); if (!trg->data) die(_("object %s cannot be read"), @@ -2775,9 +2805,9 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, } if (!src->data) { packing_data_lock(&to_pack); - src->data = repo_read_object_file(the_repository, - &src_entry->idx.oid, &type, - &sz); + src->data = odb_read_object(the_repository->objects, + &src_entry->idx.oid, &type, + &sz); packing_data_unlock(&to_pack); if (!src->data) { if (src_entry->preferred_base) { @@ -3041,6 +3071,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, struct thread_params { pthread_t thread; struct object_entry **list; + struct packing_region *regions; unsigned list_size; unsigned remaining; int window; @@ -3273,16 +3304,252 @@ static void add_tag_chain(const struct object_id *oid) } } -static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid, - int flag UNUSED, void *cb_data UNUSED) +static int add_ref_tag(const struct reference *ref, void *cb_data UNUSED) { struct object_id peeled; - if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled)) - add_tag_chain(oid); + if (!reference_get_peeled_oid(the_repository, ref, &peeled) && + obj_is_packed(&peeled)) + add_tag_chain(ref->oid); return 0; } +static int should_attempt_deltas(struct object_entry *entry) +{ + if (DELTA(entry)) + /* This happens if we decided to reuse existing + * delta from a pack. "reuse_delta &&" is implied. + */ + return 0; + + if (!entry->type_valid || + oe_size_less_than(&to_pack, entry, 50)) + return 0; + + if (entry->no_try_delta) + return 0; + + if (!entry->preferred_base) { + if (oe_type(entry) < 0) + die(_("unable to get type of object %s"), + oid_to_hex(&entry->idx.oid)); + } else if (oe_type(entry) < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + return 0; + } + + return 1; +} + +static void find_deltas_for_region(struct object_entry *list, + struct packing_region *region, + unsigned int *processed) +{ + struct object_entry **delta_list; + unsigned int delta_list_nr = 0; + + ALLOC_ARRAY(delta_list, region->nr); + for (size_t i = 0; i < region->nr; i++) { + struct object_entry *entry = list + region->start + i; + if (should_attempt_deltas(entry)) + delta_list[delta_list_nr++] = entry; + } + + QSORT(delta_list, delta_list_nr, type_size_sort); + find_deltas(delta_list, &delta_list_nr, window, depth, processed); + free(delta_list); +} + +static void find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + size_t start, size_t nr) +{ + unsigned int processed = 0; + size_t progress_nr; + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + + while (nr--) + find_deltas_for_region(list, + ®ions[start++], + &processed); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + +static void *threaded_find_deltas_by_path(void *arg) +{ + struct thread_params *me = arg; + + progress_lock(); + while (me->remaining) { + while (me->remaining) { + progress_unlock(); + find_deltas_for_region(to_pack.objects, + me->regions, + me->processed); + progress_lock(); + me->remaining--; + me->regions++; + } + + me->working = 0; + pthread_cond_signal(&progress_cond); + progress_unlock(); + + /* + * We must not set ->data_ready before we wait on the + * condition because the main thread may have set it to 1 + * before we get here. In order to be sure that new + * work is available if we see 1 in ->data_ready, it + * was initialized to 0 before this thread was spawned + * and we reset it to 0 right away. + */ + pthread_mutex_lock(&me->mutex); + while (!me->data_ready) + pthread_cond_wait(&me->cond, &me->mutex); + me->data_ready = 0; + pthread_mutex_unlock(&me->mutex); + + progress_lock(); + } + progress_unlock(); + /* leave ->working 1 so that this doesn't get more work assigned */ + return NULL; +} + +static void ll_find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + uint32_t start, uint32_t nr) +{ + struct thread_params *p; + int i, ret, active_threads = 0; + unsigned int processed = 0; + uint32_t progress_nr; + init_threaded_search(); + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + if (delta_search_threads <= 1) { + find_deltas_by_region(list, regions, start, nr); + cleanup_threaded_search(); + return; + } + + if (progress > pack_to_stdout) + fprintf_ln(stderr, + Q_("Path-based delta compression using up to %d thread", + "Path-based delta compression using up to %d threads", + delta_search_threads), + delta_search_threads); + CALLOC_ARRAY(p, delta_search_threads); + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + /* Partition the work amongst work threads. */ + for (i = 0; i < delta_search_threads; i++) { + unsigned sub_size = nr / (delta_search_threads - i); + + p[i].window = window; + p[i].depth = depth; + p[i].processed = &processed; + p[i].working = 1; + p[i].data_ready = 0; + + p[i].regions = regions; + p[i].list_size = sub_size; + p[i].remaining = sub_size; + + regions += sub_size; + nr -= sub_size; + } + + /* Start work threads. */ + for (i = 0; i < delta_search_threads; i++) { + if (!p[i].list_size) + continue; + pthread_mutex_init(&p[i].mutex, NULL); + pthread_cond_init(&p[i].cond, NULL); + ret = pthread_create(&p[i].thread, NULL, + threaded_find_deltas_by_path, &p[i]); + if (ret) + die(_("unable to create thread: %s"), strerror(ret)); + active_threads++; + } + + /* + * Now let's wait for work completion. Each time a thread is done + * with its work, we steal half of the remaining work from the + * thread with the largest number of unprocessed objects and give + * it to that newly idle thread. This ensure good load balancing + * until the remaining object list segments are simply too short + * to be worth splitting anymore. + */ + while (active_threads) { + struct thread_params *target = NULL; + struct thread_params *victim = NULL; + unsigned sub_size = 0; + + progress_lock(); + for (;;) { + for (i = 0; !target && i < delta_search_threads; i++) + if (!p[i].working) + target = &p[i]; + if (target) + break; + pthread_cond_wait(&progress_cond, &progress_mutex); + } + + for (i = 0; i < delta_search_threads; i++) + if (p[i].remaining > 2*window && + (!victim || victim->remaining < p[i].remaining)) + victim = &p[i]; + if (victim) { + sub_size = victim->remaining / 2; + target->regions = victim->regions + victim->remaining - sub_size; + victim->list_size -= sub_size; + victim->remaining -= sub_size; + } + target->list_size = sub_size; + target->remaining = sub_size; + target->working = 1; + progress_unlock(); + + pthread_mutex_lock(&target->mutex); + target->data_ready = 1; + pthread_cond_signal(&target->cond); + pthread_mutex_unlock(&target->mutex); + + if (!sub_size) { + pthread_join(target->thread, NULL); + pthread_cond_destroy(&target->cond); + pthread_mutex_destroy(&target->mutex); + active_threads--; + } + } + cleanup_threaded_search(); + free(p); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3307,39 +3574,21 @@ static void prepare_pack(int window, int depth) if (!to_pack.nr_objects || !window || !depth) return; + if (path_walk) + ll_find_deltas_by_region(to_pack.objects, to_pack.regions, + 0, to_pack.nr_regions); + ALLOC_ARRAY(delta_list, to_pack.nr_objects); nr_deltas = n = 0; for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (DELTA(entry)) - /* This happens if we decided to reuse existing - * delta from a pack. "reuse_delta &&" is implied. - */ - continue; - - if (!entry->type_valid || - oe_size_less_than(&to_pack, entry, 50)) - continue; - - if (entry->no_try_delta) + if (!should_attempt_deltas(entry)) continue; - if (!entry->preferred_base) { + if (!entry->preferred_base) nr_deltas++; - if (oe_type(entry) < 0) - die(_("unable to get type of object %s"), - oid_to_hex(&entry->idx.oid)); - } else { - if (oe_type(entry) < 0) { - /* - * This object is not found, but we - * don't have to include it anyway. - */ - continue; - } - } delta_list[n++] = entry; } @@ -3494,7 +3743,6 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; if (p) { - struct rev_info *revs = _data; struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; @@ -3502,6 +3750,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, die(_("could not get type of object %s in pack %s"), oid_to_hex(oid), p->pack_name); } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; /* * commits in included packs are used as starting points for the * subsequent revision walk @@ -3517,32 +3766,48 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; } -static void show_commit_pack_hint(struct commit *commit UNUSED, - void *data UNUSED) +static void show_object_pack_hint(struct object *object, const char *name, + void *data) { - /* nothing to do; commits don't have a namehash */ + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + if (mode == STDIN_PACKS_MODE_FOLLOW) { + if (object->type == OBJ_BLOB && + !odb_has_object(the_repository->objects, &object->oid, 0)) + return; + add_object_entry(&object->oid, object->type, name, 0); + } else { + struct object_entry *oe = packlist_find(&to_pack, &object->oid); + if (!oe) + return; + + /* + * Our 'to_pack' list was constructed by iterating all + * objects packed in included packs, and so doesn't have + * a non-zero hash field that you would typically pick + * up during a reachability traversal. + * + * Make a best-effort attempt to fill in the ->hash and + * ->no_try_delta fields here in order to perhaps + * improve the delta selection process. + */ + oe->hash = pack_name_hash_fn(name); + oe->no_try_delta = name && no_try_delta(name); + + stdin_packs_hints_nr++; + } } -static void show_object_pack_hint(struct object *object, const char *name, - void *data UNUSED) +static void show_commit_pack_hint(struct commit *commit, void *data) { - struct object_entry *oe = packlist_find(&to_pack, &object->oid); - if (!oe) + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + + if (mode == STDIN_PACKS_MODE_FOLLOW) { + show_object_pack_hint((struct object *)commit, "", data); return; + } - /* - * Our 'to_pack' list was constructed by iterating all objects packed in - * included packs, and so doesn't have a non-zero hash field that you - * would typically pick up during a reachability traversal. - * - * Make a best-effort attempt to fill in the ->hash and ->no_try_delta - * here using a now in order to perhaps improve the delta selection - * process. - */ - oe->hash = pack_name_hash_fn(name); - oe->no_try_delta = name && no_try_delta(name); + /* nothing to do; commits don't have a namehash */ - stdin_packs_hints_nr++; } static int pack_mtime_cmp(const void *_a, const void *_b) @@ -3562,32 +3827,13 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } -static void read_packs_list_from_stdin(void) +static void read_packs_list_from_stdin(struct rev_info *revs) { struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; struct string_list exclude_packs = STRING_LIST_INIT_DUP; struct string_list_item *item = NULL; - struct packed_git *p; - struct rev_info revs; - - repo_init_revisions(the_repository, &revs, NULL); - /* - * Use a revision walk to fill in the namehash of objects in the include - * packs. To save time, we'll avoid traversing through objects that are - * in excluded packs. - * - * That may cause us to avoid populating all of the namehash fields of - * all included objects, but our goal is best-effort, since this is only - * an optimization during delta selection. - */ - revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs.blob_objects = 1; - revs.tree_objects = 1; - revs.tag_objects = 1; - revs.ignore_missing_links = 1; while (strbuf_getline(&buf, stdin) != EOF) { if (!buf.len) @@ -3606,7 +3852,7 @@ static void read_packs_list_from_stdin(void) string_list_sort(&exclude_packs); string_list_remove_duplicates(&exclude_packs, 0); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); if ((item = string_list_lookup(&include_packs, pack_name))) @@ -3657,25 +3903,55 @@ static void read_packs_list_from_stdin(void) struct packed_git *p = item->util; for_each_object_in_pack(p, add_object_entry_from_pack, - &revs, + revs, FOR_EACH_OBJECT_PACK_ORDER); } + strbuf_release(&buf); + string_list_clear(&include_packs, 0); + string_list_clear(&exclude_packs, 0); +} + +static void add_unreachable_loose_objects(struct rev_info *revs); + +static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) +{ + struct rev_info revs; + + repo_init_revisions(the_repository, &revs, NULL); + /* + * Use a revision walk to fill in the namehash of objects in the include + * packs. To save time, we'll avoid traversing through objects that are + * in excluded packs. + * + * That may cause us to avoid populating all of the namehash fields of + * all included objects, but our goal is best-effort, since this is only + * an optimization during delta selection. + */ + revs.no_kept_objects = 1; + revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.blob_objects = 1; + revs.tree_objects = 1; + revs.tag_objects = 1; + revs.ignore_missing_links = 1; + + /* avoids adding objects in excluded packs */ + ignore_packed_keep_in_core = 1; + read_packs_list_from_stdin(&revs); + if (rev_list_unpacked) + add_unreachable_loose_objects(&revs); + if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); traverse_commit_list(&revs, show_commit_pack_hint, show_object_pack_hint, - NULL); + &mode); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", stdin_packs_hints_nr); - - strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); } static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, @@ -3695,7 +3971,14 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type } else { if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime)) return; - if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) { + if (!pack && type == OBJ_BLOB) { + struct odb_source *source = the_repository->objects->sources; + int found = 0; + + for (; !found && source; source = source->next) + if (odb_source_loose_has_object(source, oid)) + found = 1; + /* * If a traversed tree has a missing blob then we want * to avoid adding that missing object to our pack. @@ -3709,7 +3992,8 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type * limited to "ensure non-tip blobs which don't exist in * packs do exist via loose objects". Confused? */ - return; + if (!found) + return; } entry = create_object_entry(oid, type, pack_name_hash_fn(name), @@ -3773,7 +4057,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) } } -static void add_unreachable_loose_objects(void); static void add_objects_in_unpacked_packs(void); static void enumerate_cruft_objects(void) @@ -3783,7 +4066,7 @@ static void enumerate_cruft_objects(void) _("Enumerating cruft objects"), 0); add_objects_in_unpacked_packs(); - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); stop_progress(&progress_state); } @@ -3819,7 +4102,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs * Re-mark only the fresh packs as kept so that objects in * unknown packs do not halt the reachability traversal early. */ - for (p = get_all_packs(the_repository); p; p = p->next) + repo_for_each_pack(the_repository, p) p->pack_keep_in_core = 0; mark_pack_kept_in_core(fresh_packs, 1); @@ -3856,7 +4139,7 @@ static void read_cruft_objects(void) string_list_sort(&discard_packs); string_list_sort(&fresh_packs); - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *pack_name = pack_basename(p); struct string_list_item *item; @@ -3966,7 +4249,7 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void * Quietly ignore ALL missing objects. This avoids problems with * staging them now and getting an odd error later. */ - if (!has_object(the_repository, &obj->oid, 0)) + if (!odb_has_object(the_repository->objects, &obj->oid, 0)) return; show_object(obj, name, data); @@ -3980,7 +4263,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name, * Quietly ignore EXPECTED missing objects. This avoids problems with * staging them now and getting an odd error later. */ - if (!has_object(the_repository, &obj->oid, 0) && + if (!odb_has_object(the_repository->objects, &obj->oid, 0) && is_promisor_object(to_pack.repo, &obj->oid)) return; @@ -4061,9 +4344,10 @@ static void add_objects_in_unpacked_packs(void) } static int add_loose_object(const struct object_id *oid, const char *path, - void *data UNUSED) + void *data) { - enum object_type type = oid_object_info(the_repository, oid, NULL); + struct rev_info *revs = data; + enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL); if (type < 0) { warning(_("loose object at %s could not be examined"), path); @@ -4083,6 +4367,10 @@ static int add_loose_object(const struct object_id *oid, const char *path, } else { add_object_entry(oid, type, "", 0); } + + if (revs && type == OBJ_COMMIT) + add_pending_oid(revs, NULL, oid, 0); + return 0; } @@ -4091,35 +4379,35 @@ static int add_loose_object(const struct object_id *oid, const char *path, * add_object_entry will weed out duplicates, so we just add every * loose object we find. */ -static void add_unreachable_loose_objects(void) +static void add_unreachable_loose_objects(struct rev_info *revs) { - for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), - add_loose_object, - NULL, NULL, NULL); + for_each_loose_file_in_source(the_repository->objects->sources, + add_loose_object, NULL, NULL, revs); } static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { - static struct packed_git *last_found = (void *)1; + static struct packed_git *last_found = NULL; struct packed_git *p; - p = (last_found != (void *)1) ? last_found : - get_all_packs(the_repository); + if (last_found && find_pack_entry_one(oid, last_found)) + return 1; + + repo_for_each_pack(the_repository, p) { + /* + * We have already checked `last_found`, so there is no need to + * re-check here. + */ + if (p == last_found) + continue; - while (p) { - if ((!p->pack_local || p->pack_keep || - p->pack_keep_in_core) && - find_pack_entry_one(oid, p)) { + if ((!p->pack_local || p->pack_keep || p->pack_keep_in_core) && + find_pack_entry_one(oid, p)) { last_found = p; return 1; } - if (p == last_found) - p = get_all_packs(the_repository); - else - p = p->next; - if (p == last_found) - p = p->next; } + return 0; } @@ -4151,7 +4439,7 @@ static void loosen_unused_packed_objects(void) uint32_t loosened_objects_nr = 0; struct object_id oid; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -4163,7 +4451,8 @@ static void loosen_unused_packed_objects(void) if (!packlist_find(&to_pack, &oid) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) { - if (force_object_loose(&oid, p->mtime)) + if (force_object_loose(the_repository->objects->sources, + &oid, p->mtime)) die(_("unable to force loose object")); loosened_objects_nr++; } @@ -4237,19 +4526,16 @@ static void record_recent_commit(struct commit *commit, void *data UNUSED) oid_array_append(&recent_objects, &commit->object.oid); } -static int mark_bitmap_preferred_tip(const char *refname, - const char *referent UNUSED, - const struct object_id *oid, - int flags UNUSED, - void *data UNUSED) +static int mark_bitmap_preferred_tip(const struct reference *ref, void *data UNUSED) { + const struct object_id *maybe_peeled = ref->oid; struct object_id peeled; struct object *object; - if (!peel_iterated_oid(the_repository, oid, &peeled)) - oid = &peeled; + if (!reference_get_peeled_oid(the_repository, ref, &peeled)) + maybe_peeled = &peeled; - object = parse_object_or_die(the_repository, oid, refname); + object = parse_object_or_die(the_repository, maybe_peeled, ref->name); if (object->type == OBJ_COMMIT) object->flags |= NEEDS_BITMAP; @@ -4272,7 +4558,94 @@ static void mark_bitmap_preferred_tips(void) } } -static void get_object_list(struct rev_info *revs, int ac, const char **av) +static inline int is_oid_uninteresting(struct repository *repo, + struct object_id *oid) +{ + struct object *o = lookup_object(repo, oid); + return !o || (o->flags & UNINTERESTING); +} + +static int add_objects_by_path(const char *path, + struct oid_array *oids, + enum object_type type, + void *data) +{ + size_t oe_start = to_pack.nr_objects; + size_t oe_end; + unsigned int *processed = data; + + /* + * First, add all objects to the packing data, including the ones + * marked UNINTERESTING (translated to 'exclude') as they can be + * used as delta bases. + */ + for (size_t i = 0; i < oids->nr; i++) { + int exclude; + struct object_info oi = OBJECT_INFO_INIT; + struct object_id *oid = &oids->oid[i]; + + /* Skip objects that do not exist locally. */ + if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) && + odb_read_object_info_extended(the_repository->objects, oid, &oi, + OBJECT_INFO_FOR_PREFETCH) < 0) + continue; + + exclude = is_oid_uninteresting(the_repository, oid); + + if (exclude && !thin) + continue; + + add_object_entry(oid, type, path, exclude); + } + + oe_end = to_pack.nr_objects; + + /* We can skip delta calculations if it is a no-op. */ + if (oe_end == oe_start || !window) + return 0; + + ALLOC_GROW(to_pack.regions, + to_pack.nr_regions + 1, + to_pack.nr_regions_alloc); + + to_pack.regions[to_pack.nr_regions].start = oe_start; + to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; + to_pack.nr_regions++; + + *processed += oids->nr; + display_progress(progress_state, *processed); + + return 0; +} + +static void get_object_list_path_walk(struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + unsigned int processed = 0; + int result; + + info.revs = revs; + info.path_fn = add_objects_by_path; + info.path_fn_data = &processed; + + /* + * Allow the --[no-]sparse option to be interesting here, if only + * for testing purposes. Paths with no interesting objects will not + * contribute to the resulting pack, but only create noisy preferred + * base objects. + */ + info.prune_all_uninteresting = sparse; + info.edge_aggressive = shallow; + + trace2_region_enter("pack-objects", "path-walk", revs->repo); + result = walk_objects_by_path(&info); + trace2_region_leave("pack-objects", "path-walk", revs->repo); + + if (result) + die(_("failed to pack objects via path-walk")); +} + +static void get_object_list(struct rev_info *revs, struct strvec *argv) { struct setup_revision_opt s_r_opt = { .allow_exclude_promisor_objects = 1, @@ -4282,7 +4655,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) int save_warning; save_commit_buffer = 0; - setup_revisions(ac, av, revs, &s_r_opt); + setup_revisions_from_strvec(argv, revs, &s_r_opt); /* make sure shallows are read */ is_repository_shallow(the_repository); @@ -4327,15 +4700,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (write_bitmap_index) mark_bitmap_preferred_tips(); - if (prepare_revision_walk(revs)) - die(_("revision walk setup failed")); - mark_edges_uninteresting(revs, show_edge, sparse); - if (!fn_show_object) fn_show_object = show_object; - traverse_commit_list(revs, - show_commit, fn_show_object, - NULL); + + if (path_walk) { + get_object_list_path_walk(revs); + } else { + if (prepare_revision_walk(revs)) + die(_("revision walk setup failed")); + mark_edges_uninteresting(revs, show_edge, sparse); + traverse_commit_list(revs, + show_commit, fn_show_object, + NULL); + } if (unpack_unreachable_expiration) { revs->ignore_missing_links = 1; @@ -4351,7 +4728,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (keep_unreachable) add_objects_in_unpacked_packs(); if (pack_loose_unreachable) - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); if (unpack_unreachable) loosen_unused_packed_objects(); @@ -4365,7 +4742,7 @@ static void add_extra_kept_packs(const struct string_list *names) if (!names->nr) return; - for (p = get_all_packs(the_repository); p; p = p->next) { + repo_for_each_pack(the_repository, p) { const char *name = basename(p->pack_name); int i; @@ -4449,7 +4826,7 @@ static int option_parse_cruft_expiration(const struct option *opt UNUSED, static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED) { struct object_info info = OBJECT_INFO_INIT; - if (oid_object_info_extended(the_repository, &obj->oid, &info, 0)) + if (odb_read_object_info_extended(the_repository->objects, &obj->oid, &info, 0)) BUG("should_include_obj should only be called on existing objects"); return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor; } @@ -4458,18 +4835,34 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) { return is_not_in_promisor_pack_obj((struct object *) commit, data); } +static int parse_stdin_packs_mode(const struct option *opt, const char *arg, + int unset) +{ + enum stdin_packs_mode *mode = opt->value; + + if (unset) + *mode = STDIN_PACKS_MODE_NONE; + else if (!arg || !*arg) + *mode = STDIN_PACKS_MODE_STANDARD; + else if (!strcmp(arg, "follow")) + *mode = STDIN_PACKS_MODE_FOLLOW; + else + die(_("invalid value for '%s': '%s'"), opt->long_name, arg); + + return 0; +} + int cmd_pack_objects(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { int use_internal_rev_list = 0; - int shallow = 0; int all_progress_implied = 0; struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; int rev_list_index = 0; - int stdin_packs = 0; + enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; @@ -4524,6 +4917,9 @@ int cmd_pack_objects(int argc, OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, N_("include objects referred to by the index"), 1, PARSE_OPT_NONEG), + OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"), + N_("read packs from stdin"), + PARSE_OPT_OPTARG, parse_stdin_packs_mode), OPT_BOOL(0, "stdin-packs", &stdin_packs, N_("read packs from stdin")), OPT_BOOL(0, "stdout", &pack_to_stdout, @@ -4545,6 +4941,8 @@ int cmd_pack_objects(int argc, N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, N_("create thin packs")), + OPT_BOOL(0, "path-walk", &path_walk, + N_("use the path-walk API to walk objects when possible")), OPT_BOOL(0, "shallow", &shallow, N_("create packs suitable for shallow fetches")), OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk, @@ -4599,7 +4997,7 @@ int cmd_pack_objects(int argc, reset_pack_idx_option(&pack_idx_opts); pack_idx_opts.flags |= WRITE_REV; - git_config(git_pack_config, NULL); + repo_config(the_repository, git_pack_config, NULL); if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0)) pack_idx_opts.flags &= ~WRITE_REV; @@ -4614,6 +5012,17 @@ int cmd_pack_objects(int argc, if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); + if (path_walk < 0) { + if (use_bitmap_index > 0 || + !use_internal_rev_list) + path_walk = 0; + else if (the_repository->gitdir && + the_repository->settings.pack_use_path_walk) + path_walk = 1; + else + path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0); + } + if (depth < 0) depth = 0; if (depth >= (1 << OE_DEPTH_BITS)) { @@ -4630,7 +5039,28 @@ int cmd_pack_objects(int argc, window = 0; strvec_push(&rp, "pack-objects"); - if (thin) { + + if (path_walk) { + const char *option = NULL; + if (filter_options.choice) + option = "--filter"; + else if (use_delta_islands) + option = "--delta-islands"; + + if (option) { + warning(_("cannot use %s with %s"), + option, "--path-walk"); + path_walk = 0; + } + } + if (path_walk) { + strvec_push(&rp, "--boundary"); + /* + * We must disable the bitmaps because we are removing + * the --objects / --objects-edge[-aggressive] options. + */ + use_bitmap_index = 0; + } else if (thin) { use_internal_rev_list = 1; strvec_push(&rp, shallow ? "--objects-edge-aggressive" @@ -4655,9 +5085,10 @@ int cmd_pack_objects(int argc, strvec_push(&rp, "--unpacked"); } - if (exclude_promisor_objects && exclude_promisor_objects_best_effort) - die(_("options '%s' and '%s' cannot be used together"), - "--exclude-promisor-objects", "--exclude-promisor-objects-best-effort"); + die_for_incompatible_opt2(exclude_promisor_objects, + "--exclude-promisor-objects", + exclude_promisor_objects_best_effort, + "--exclude-promisor-objects-best-effort"); if (exclude_promisor_objects) { use_internal_rev_list = 1; fetch_if_missing = 0; @@ -4695,13 +5126,14 @@ int cmd_pack_objects(int argc, if (!pack_to_stdout && thin) die(_("--thin cannot be used to build an indexable pack")); - if (keep_unreachable && unpack_unreachable) - die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable"); + die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable", + unpack_unreachable, "--unpack-unreachable"); if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; - if (stdin_packs && filter_options.choice) - die(_("cannot use --filter with --stdin-packs")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + filter_options.choice, "--filter"); + if (stdin_packs && use_internal_rev_list) die(_("cannot use internal rev list with --stdin-packs")); @@ -4709,8 +5141,8 @@ int cmd_pack_objects(int argc, if (cruft) { if (use_internal_rev_list) die(_("cannot use internal rev list with --cruft")); - if (stdin_packs) - die(_("cannot use --stdin-packs with --cruft")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + cruft, "--cruft"); } /* @@ -4749,7 +5181,8 @@ int cmd_pack_objects(int argc, add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) + + repo_for_each_pack(the_repository, p) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -4762,7 +5195,8 @@ int cmd_pack_objects(int argc, * it also covers non-local objects */ struct packed_git *p; - for (p = get_all_packs(the_repository); p; p = p->next) { + + repo_for_each_pack(the_repository, p) { if (!p->pack_local) { have_non_local_packs = 1; break; @@ -4778,11 +5212,7 @@ int cmd_pack_objects(int argc, progress_state = start_progress(the_repository, _("Enumerating objects"), 0); if (stdin_packs) { - /* avoids adding objects in excluded packs */ - ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(); - if (rev_list_unpacked) - add_unreachable_loose_objects(); + read_stdin_packs(stdin_packs, rev_list_unpacked); } else if (cruft) { read_cruft_objects(); } else if (!use_internal_rev_list) { @@ -4796,7 +5226,7 @@ int cmd_pack_objects(int argc, revs.include_check = is_not_in_promisor_pack; revs.include_check_obj = is_not_in_promisor_pack_obj; } - get_object_list(&revs, rp.nr, rp.v); + get_object_list(&revs, &rp); release_revisions(&revs); } cleanup_preferred_base(); |
