diff options
Diffstat (limited to 'builtin/index-pack.c')
| -rw-r--r-- | builtin/index-pack.c | 193 |
1 files changed, 173 insertions, 20 deletions
diff --git a/builtin/index-pack.c b/builtin/index-pack.c index e228c56ff2..0b62b2589f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1,4 +1,6 @@ #define USE_THE_REPOSITORY_VARIABLE +#define DISABLE_SIGN_COMPARE_WARNINGS + #include "builtin.h" #include "config.h" #include "delta.h" @@ -9,6 +11,7 @@ #include "csum-file.h" #include "blob.h" #include "commit.h" +#include "tag.h" #include "tree.h" #include "progress.h" #include "fsck.h" @@ -20,9 +23,14 @@ #include "object-file.h" #include "object-store-ll.h" #include "oid-array.h" +#include "oidset.h" +#include "path.h" #include "replace-object.h" +#include "tree-walk.h" #include "promisor-remote.h" +#include "run-command.h" #include "setup.h" +#include "strvec.h" static const char index_pack_usage[] = "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; @@ -94,7 +102,7 @@ static LIST_HEAD(done_head); static size_t base_cache_used; static size_t base_cache_limit; -struct thread_local { +struct thread_local_data { pthread_t thread; int pack_fd; }; @@ -117,7 +125,7 @@ static struct object_entry *objects; static struct object_stat *obj_stat; static struct ofs_delta_entry *ofs_deltas; static struct ref_delta_entry *ref_deltas; -static struct thread_local nothread_data; +static struct thread_local_data nothread_data; static int nr_objects; static int nr_ofs_deltas; static int nr_ref_deltas; @@ -148,7 +156,14 @@ static uint32_t input_crc32; static int input_fd, output_fd; static const char *curr_pack; -static struct thread_local *thread_data; +/* + * outgoing_links is guarded by read_mutex, and record_outgoing_links is + * read-only in a thread. + */ +static struct oidset outgoing_links = OIDSET_INIT; +static int record_outgoing_links; + +static struct thread_local_data *thread_data; static int nr_dispatched; static int threads_active; @@ -390,7 +405,7 @@ static NORETURN void bad_object(off_t offset, const char *format, ...) (uintmax_t)offset, buf); } -static inline struct thread_local *get_thread_data(void) +static inline struct thread_local_data *get_thread_data(void) { if (HAVE_THREADS) { if (threads_active) @@ -401,7 +416,7 @@ static inline struct thread_local *get_thread_data(void) return ¬hread_data; } -static void set_thread_data(struct thread_local *data) +static void set_thread_data(struct thread_local_data *data) { if (threads_active) pthread_setspecific(key, data); @@ -799,6 +814,68 @@ static int check_collison(struct object_entry *entry) return 0; } +static void record_outgoing_link(const struct object_id *oid) +{ + oidset_insert(&outgoing_links, oid); +} + +static void maybe_record_name_entry(const struct name_entry *entry) +{ + /* + * Checking only trees here results in a significantly faster packfile + * indexing, but the drawback is that if the packfile to be indexed + * references a local blob only directly (that is, never through a + * local tree), that local blob is in danger of being garbage + * collected. Such a situation may arise if we push local commits, + * including one with a change to a blob in the root tree, and then the + * server incorporates them into its main branch through a "rebase" or + * "squash" merge strategy, and then we fetch the new main branch from + * the server. + * + * This situation has not been observed yet - we have only noticed + * missing commits, not missing trees or blobs. (In fact, if it were + * believed that only missing commits are problematic, one could argue + * that we should also exclude trees during the outgoing link check; + * but it is safer to include them.) + * + * Due to the rarity of the situation (it has not been observed to + * happen in real life), and because the "penalty" in such a situation + * is merely to refetch the missing blob when it's needed (and this + * happens only once - when refetched, the blob goes into a promisor + * pack, so it won't be GC-ed, the tradeoff seems worth it. + */ + if (S_ISDIR(entry->mode)) + record_outgoing_link(&entry->oid); +} + +static void do_record_outgoing_links(struct object *obj) +{ + if (obj->type == OBJ_TREE) { + struct tree *tree = (struct tree *)obj; + struct tree_desc desc; + struct name_entry entry; + if (init_tree_desc_gently(&desc, &tree->object.oid, + tree->buffer, tree->size, 0)) + /* + * Error messages are given when packs are + * verified, so do not print any here. + */ + return; + while (tree_entry_gently(&desc, &entry)) + maybe_record_name_entry(&entry); + } else if (obj->type == OBJ_COMMIT) { + struct commit *commit = (struct commit *) obj; + struct commit_list *parents = commit->parents; + + record_outgoing_link(get_commit_tree_oid(commit)); + for (; parents; parents = parents->next) + record_outgoing_link(&parents->item->object.oid); + } else if (obj->type == OBJ_TAG) { + struct tag *tag = (struct tag *) obj; + record_outgoing_link(get_tagged_oid(tag)); + } +} + static void sha1_object(const void *data, struct object_entry *obj_entry, unsigned long size, enum object_type type, const struct object_id *oid) @@ -845,7 +922,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, free(has_data); } - if (strict || do_fsck_object) { + if (strict || do_fsck_object || record_outgoing_links) { read_lock(); if (type == OBJ_BLOB) { struct blob *blob = lookup_blob(the_repository, oid); @@ -877,6 +954,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); + if (record_outgoing_links) + do_record_outgoing_links(obj); if (obj->type == OBJ_TREE) { struct tree *item = (struct tree *) obj; @@ -1238,7 +1317,7 @@ static void parse_pack_objects(unsigned char *hash) * recursively checking if the resulting object is used as a base * for some more deltas. */ -static void resolve_deltas(void) +static void resolve_deltas(struct pack_idx_option *opts) { int i; @@ -1254,10 +1333,9 @@ static void resolve_deltas(void) nr_ref_deltas + nr_ofs_deltas); nr_dispatched = 0; - base_cache_limit = delta_base_cache_limit * nr_threads; + base_cache_limit = opts->delta_base_cache_limit * nr_threads; if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) { init_thread(); - work_lock(); for (i = 0; i < nr_threads; i++) { int ret = pthread_create(&thread_data[i].thread, NULL, threaded_second_pass, thread_data + i); @@ -1265,7 +1343,6 @@ static void resolve_deltas(void) die(_("unable to create thread: %s"), strerror(ret)); } - work_unlock(); for (i = 0; i < nr_threads; i++) pthread_join(thread_data[i].thread, NULL); cleanup_thread(); @@ -1479,7 +1556,7 @@ static void write_special_file(const char *suffix, const char *msg, if (pack_name) filename = derive_filename(pack_name, "pack", suffix, &name_buf); else - filename = odb_pack_name(&name_buf, hash, suffix); + filename = odb_pack_name(the_repository, &name_buf, hash, suffix); fd = odb_pack_keep(filename); if (fd < 0) { @@ -1505,9 +1582,9 @@ static void rename_tmp_packfile(const char **final_name, struct strbuf *name, unsigned char *hash, const char *ext, int make_read_only_if_same) { - if (*final_name != curr_name) { + if (!*final_name || strcmp(*final_name, curr_name)) { if (!*final_name) - *final_name = odb_pack_name(name, hash, ext); + *final_name = odb_pack_name(the_repository, name, hash, ext); if (finalize_object_file(curr_name, *final_name)) die(_("unable to rename temporary '*.%s' file to '%s'"), ext, *final_name); @@ -1552,7 +1629,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name, if (do_fsck_object) { struct packed_git *p; - p = add_packed_git(final_index_name, strlen(final_index_name), 0); + p = add_packed_git(the_repository, final_index_name, + strlen(final_index_name), 0); if (p) install_packed_git(the_repository, p); } @@ -1603,6 +1681,10 @@ static int git_index_pack_config(const char *k, const char *v, else opts->flags &= ~WRITE_REV; } + if (!strcmp(k, "core.deltabasecachelimit")) { + opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi); + return 0; + } return git_default_config(k, v, ctx, cb); } @@ -1650,7 +1732,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p, static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) { - struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); + struct packed_git *p = add_packed_git(the_repository, pack_name, + strlen(pack_name), 1); if (!p) die(_("Cannot open existing pack file '%s'"), pack_name); @@ -1719,6 +1802,73 @@ static void show_pack_info(int stat_only) free(chain_histogram); } +static void repack_local_links(void) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + struct oidset_iter iter; + struct object_id *oid; + char *base_name = NULL; + + if (!oidset_size(&outgoing_links)) + return; + + oidset_iter_init(&outgoing_links, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct object_info info = OBJECT_INFO_INIT; + if (oid_object_info_extended(the_repository, oid, &info, 0)) + /* Missing; assume it is a promisor object */ + continue; + if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor) + continue; + + if (!cmd.args.nr) { + base_name = mkpathdup( + "%s/pack/pack", + repo_get_object_directory(the_repository)); + strvec_push(&cmd.args, "pack-objects"); + strvec_push(&cmd.args, + "--exclude-promisor-objects-best-effort"); + strvec_push(&cmd.args, base_name); + cmd.git_cmd = 1; + cmd.in = -1; + cmd.out = -1; + if (start_command(&cmd)) + die(_("could not start pack-objects to repack local links")); + } + + if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + write_in_full(cmd.in, "\n", 1) < 0) + die(_("failed to feed local object to pack-objects")); + } + + if (!cmd.args.nr) + return; + + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + unsigned char binary[GIT_MAX_RAWSZ]; + if (line.len != the_hash_algo->hexsz || + !hex_to_bytes(binary, line.buf, line.len)) + die(_("index-pack: Expecting full hex object ID lines only from pack-objects.")); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + */ + write_special_file("promisor", "", NULL, binary, NULL); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack local links")); + strbuf_release(&line); + free(base_name); +} + int cmd_index_pack(int argc, const char **argv, const char *prefix, @@ -1726,7 +1876,7 @@ int cmd_index_pack(int argc, { int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index; const char *curr_index; - const char *curr_rev_index = NULL; + char *curr_rev_index = NULL; const char *index_name = NULL, *pack_name = NULL, *rev_index_name = NULL; const char *keep_msg = NULL; const char *promisor_msg = NULL; @@ -1794,7 +1944,7 @@ int cmd_index_pack(int argc, } else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) { ; /* nothing to do */ } else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) { - ; /* already parsed */ + record_outgoing_links = 1; } else if (starts_with(arg, "--threads=")) { char *end; nr_threads = strtoul(arg+10, &end, 0); @@ -1865,6 +2015,8 @@ int cmd_index_pack(int argc, usage(index_pack_usage); if (fix_thin_pack && !from_stdin) die(_("the option '%s' requires '%s'"), "--fix-thin", "--stdin"); + if (promisor_msg && pack_name) + die(_("--promisor cannot be used with a pack name")); if (from_stdin && !startup_info->have_repository) die(_("--stdin requires a git repository")); if (from_stdin && hash_algo) @@ -1928,7 +2080,7 @@ int cmd_index_pack(int argc, parse_pack_objects(pack_hash); if (report_end_of_input) write_in_full(2, "\0", 1); - resolve_deltas(); + resolve_deltas(&opts); conclude_pack(fix_thin_pack, curr_pack, pack_hash); free(ofs_deltas); free(ref_deltas); @@ -1968,8 +2120,9 @@ int cmd_index_pack(int argc, free((void *) curr_pack); if (!index_name) free((void *) curr_index); - if (!rev_index_name) - free((void *) curr_rev_index); + free(curr_rev_index); + + repack_local_links(); /* * Let the caller know this pack is not self contained |
