summaryrefslogtreecommitdiff
path: root/builtin/index-pack.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/index-pack.c')
-rw-r--r--builtin/index-pack.c193
1 files changed, 173 insertions, 20 deletions
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index e228c56ff2..0b62b2589f 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1,4 +1,6 @@
#define USE_THE_REPOSITORY_VARIABLE
+#define DISABLE_SIGN_COMPARE_WARNINGS
+
#include "builtin.h"
#include "config.h"
#include "delta.h"
@@ -9,6 +11,7 @@
#include "csum-file.h"
#include "blob.h"
#include "commit.h"
+#include "tag.h"
#include "tree.h"
#include "progress.h"
#include "fsck.h"
@@ -20,9 +23,14 @@
#include "object-file.h"
#include "object-store-ll.h"
#include "oid-array.h"
+#include "oidset.h"
+#include "path.h"
#include "replace-object.h"
+#include "tree-walk.h"
#include "promisor-remote.h"
+#include "run-command.h"
#include "setup.h"
+#include "strvec.h"
static const char index_pack_usage[] =
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
@@ -94,7 +102,7 @@ static LIST_HEAD(done_head);
static size_t base_cache_used;
static size_t base_cache_limit;
-struct thread_local {
+struct thread_local_data {
pthread_t thread;
int pack_fd;
};
@@ -117,7 +125,7 @@ static struct object_entry *objects;
static struct object_stat *obj_stat;
static struct ofs_delta_entry *ofs_deltas;
static struct ref_delta_entry *ref_deltas;
-static struct thread_local nothread_data;
+static struct thread_local_data nothread_data;
static int nr_objects;
static int nr_ofs_deltas;
static int nr_ref_deltas;
@@ -148,7 +156,14 @@ static uint32_t input_crc32;
static int input_fd, output_fd;
static const char *curr_pack;
-static struct thread_local *thread_data;
+/*
+ * outgoing_links is guarded by read_mutex, and record_outgoing_links is
+ * read-only in a thread.
+ */
+static struct oidset outgoing_links = OIDSET_INIT;
+static int record_outgoing_links;
+
+static struct thread_local_data *thread_data;
static int nr_dispatched;
static int threads_active;
@@ -390,7 +405,7 @@ static NORETURN void bad_object(off_t offset, const char *format, ...)
(uintmax_t)offset, buf);
}
-static inline struct thread_local *get_thread_data(void)
+static inline struct thread_local_data *get_thread_data(void)
{
if (HAVE_THREADS) {
if (threads_active)
@@ -401,7 +416,7 @@ static inline struct thread_local *get_thread_data(void)
return &nothread_data;
}
-static void set_thread_data(struct thread_local *data)
+static void set_thread_data(struct thread_local_data *data)
{
if (threads_active)
pthread_setspecific(key, data);
@@ -799,6 +814,68 @@ static int check_collison(struct object_entry *entry)
return 0;
}
+static void record_outgoing_link(const struct object_id *oid)
+{
+ oidset_insert(&outgoing_links, oid);
+}
+
+static void maybe_record_name_entry(const struct name_entry *entry)
+{
+ /*
+ * Checking only trees here results in a significantly faster packfile
+ * indexing, but the drawback is that if the packfile to be indexed
+ * references a local blob only directly (that is, never through a
+ * local tree), that local blob is in danger of being garbage
+ * collected. Such a situation may arise if we push local commits,
+ * including one with a change to a blob in the root tree, and then the
+ * server incorporates them into its main branch through a "rebase" or
+ * "squash" merge strategy, and then we fetch the new main branch from
+ * the server.
+ *
+ * This situation has not been observed yet - we have only noticed
+ * missing commits, not missing trees or blobs. (In fact, if it were
+ * believed that only missing commits are problematic, one could argue
+ * that we should also exclude trees during the outgoing link check;
+ * but it is safer to include them.)
+ *
+ * Due to the rarity of the situation (it has not been observed to
+ * happen in real life), and because the "penalty" in such a situation
+ * is merely to refetch the missing blob when it's needed (and this
+ * happens only once - when refetched, the blob goes into a promisor
+ * pack, so it won't be GC-ed, the tradeoff seems worth it.
+ */
+ if (S_ISDIR(entry->mode))
+ record_outgoing_link(&entry->oid);
+}
+
+static void do_record_outgoing_links(struct object *obj)
+{
+ if (obj->type == OBJ_TREE) {
+ struct tree *tree = (struct tree *)obj;
+ struct tree_desc desc;
+ struct name_entry entry;
+ if (init_tree_desc_gently(&desc, &tree->object.oid,
+ tree->buffer, tree->size, 0))
+ /*
+ * Error messages are given when packs are
+ * verified, so do not print any here.
+ */
+ return;
+ while (tree_entry_gently(&desc, &entry))
+ maybe_record_name_entry(&entry);
+ } else if (obj->type == OBJ_COMMIT) {
+ struct commit *commit = (struct commit *) obj;
+ struct commit_list *parents = commit->parents;
+
+ record_outgoing_link(get_commit_tree_oid(commit));
+ for (; parents; parents = parents->next)
+ record_outgoing_link(&parents->item->object.oid);
+ } else if (obj->type == OBJ_TAG) {
+ struct tag *tag = (struct tag *) obj;
+ record_outgoing_link(get_tagged_oid(tag));
+ }
+}
+
static void sha1_object(const void *data, struct object_entry *obj_entry,
unsigned long size, enum object_type type,
const struct object_id *oid)
@@ -845,7 +922,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
free(has_data);
}
- if (strict || do_fsck_object) {
+ if (strict || do_fsck_object || record_outgoing_links) {
read_lock();
if (type == OBJ_BLOB) {
struct blob *blob = lookup_blob(the_repository, oid);
@@ -877,6 +954,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
die(_("fsck error in packed object"));
if (strict && fsck_walk(obj, NULL, &fsck_options))
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
+ if (record_outgoing_links)
+ do_record_outgoing_links(obj);
if (obj->type == OBJ_TREE) {
struct tree *item = (struct tree *) obj;
@@ -1238,7 +1317,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,10 +1333,9 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
- work_lock();
for (i = 0; i < nr_threads; i++) {
int ret = pthread_create(&thread_data[i].thread, NULL,
threaded_second_pass, thread_data + i);
@@ -1265,7 +1343,6 @@ static void resolve_deltas(void)
die(_("unable to create thread: %s"),
strerror(ret));
}
- work_unlock();
for (i = 0; i < nr_threads; i++)
pthread_join(thread_data[i].thread, NULL);
cleanup_thread();
@@ -1479,7 +1556,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1505,9 +1582,9 @@ static void rename_tmp_packfile(const char **final_name,
struct strbuf *name, unsigned char *hash,
const char *ext, int make_read_only_if_same)
{
- if (*final_name != curr_name) {
+ if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
@@ -1552,7 +1629,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1603,6 +1681,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1650,7 +1732,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
@@ -1719,6 +1802,73 @@ static void show_pack_info(int stat_only)
free(chain_histogram);
}
+static void repack_local_links(void)
+{
+ struct child_process cmd = CHILD_PROCESS_INIT;
+ FILE *out;
+ struct strbuf line = STRBUF_INIT;
+ struct oidset_iter iter;
+ struct object_id *oid;
+ char *base_name = NULL;
+
+ if (!oidset_size(&outgoing_links))
+ return;
+
+ oidset_iter_init(&outgoing_links, &iter);
+ while ((oid = oidset_iter_next(&iter))) {
+ struct object_info info = OBJECT_INFO_INIT;
+ if (oid_object_info_extended(the_repository, oid, &info, 0))
+ /* Missing; assume it is a promisor object */
+ continue;
+ if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
+ continue;
+
+ if (!cmd.args.nr) {
+ base_name = mkpathdup(
+ "%s/pack/pack",
+ repo_get_object_directory(the_repository));
+ strvec_push(&cmd.args, "pack-objects");
+ strvec_push(&cmd.args,
+ "--exclude-promisor-objects-best-effort");
+ strvec_push(&cmd.args, base_name);
+ cmd.git_cmd = 1;
+ cmd.in = -1;
+ cmd.out = -1;
+ if (start_command(&cmd))
+ die(_("could not start pack-objects to repack local links"));
+ }
+
+ if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
+ write_in_full(cmd.in, "\n", 1) < 0)
+ die(_("failed to feed local object to pack-objects"));
+ }
+
+ if (!cmd.args.nr)
+ return;
+
+ close(cmd.in);
+
+ out = xfdopen(cmd.out, "r");
+ while (strbuf_getline_lf(&line, out) != EOF) {
+ unsigned char binary[GIT_MAX_RAWSZ];
+ if (line.len != the_hash_algo->hexsz ||
+ !hex_to_bytes(binary, line.buf, line.len))
+ die(_("index-pack: Expecting full hex object ID lines only from pack-objects."));
+
+ /*
+ * pack-objects creates the .pack and .idx files, but not the
+ * .promisor file. Create the .promisor file, which is empty.
+ */
+ write_special_file("promisor", "", NULL, binary, NULL);
+ }
+
+ fclose(out);
+ if (finish_command(&cmd))
+ die(_("could not finish pack-objects to repack local links"));
+ strbuf_release(&line);
+ free(base_name);
+}
+
int cmd_index_pack(int argc,
const char **argv,
const char *prefix,
@@ -1726,7 +1876,7 @@ int cmd_index_pack(int argc,
{
int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index;
const char *curr_index;
- const char *curr_rev_index = NULL;
+ char *curr_rev_index = NULL;
const char *index_name = NULL, *pack_name = NULL, *rev_index_name = NULL;
const char *keep_msg = NULL;
const char *promisor_msg = NULL;
@@ -1794,7 +1944,7 @@ int cmd_index_pack(int argc,
} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
; /* nothing to do */
} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
- ; /* already parsed */
+ record_outgoing_links = 1;
} else if (starts_with(arg, "--threads=")) {
char *end;
nr_threads = strtoul(arg+10, &end, 0);
@@ -1865,6 +2015,8 @@ int cmd_index_pack(int argc,
usage(index_pack_usage);
if (fix_thin_pack && !from_stdin)
die(_("the option '%s' requires '%s'"), "--fix-thin", "--stdin");
+ if (promisor_msg && pack_name)
+ die(_("--promisor cannot be used with a pack name"));
if (from_stdin && !startup_info->have_repository)
die(_("--stdin requires a git repository"));
if (from_stdin && hash_algo)
@@ -1928,7 +2080,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
@@ -1968,8 +2120,9 @@ int cmd_index_pack(int argc,
free((void *) curr_pack);
if (!index_name)
free((void *) curr_index);
- if (!rev_index_name)
- free((void *) curr_rev_index);
+ free(curr_rev_index);
+
+ repack_local_links();
/*
* Let the caller know this pack is not self contained