summaryrefslogtreecommitdiff
path: root/builtin/index-pack.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/index-pack.c')
-rw-r--r--builtin/index-pack.c274
1 files changed, 213 insertions, 61 deletions
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index e228c56ff2..de127c0ff1 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1,4 +1,6 @@
#define USE_THE_REPOSITORY_VARIABLE
+#define DISABLE_SIGN_COMPARE_WARNINGS
+
#include "builtin.h"
#include "config.h"
#include "delta.h"
@@ -9,6 +11,7 @@
#include "csum-file.h"
#include "blob.h"
#include "commit.h"
+#include "tag.h"
#include "tree.h"
#include "progress.h"
#include "fsck.h"
@@ -20,9 +23,14 @@
#include "object-file.h"
#include "object-store-ll.h"
#include "oid-array.h"
+#include "oidset.h"
+#include "path.h"
#include "replace-object.h"
+#include "tree-walk.h"
#include "promisor-remote.h"
+#include "run-command.h"
#include "setup.h"
+#include "strvec.h"
static const char index_pack_usage[] =
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--[no-]rev-index] [--verify] [--strict[=<msg-id>=<severity>...]] [--fsck-objects[=<msg-id>=<severity>...]] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
@@ -94,7 +102,7 @@ static LIST_HEAD(done_head);
static size_t base_cache_used;
static size_t base_cache_limit;
-struct thread_local {
+struct thread_local_data {
pthread_t thread;
int pack_fd;
};
@@ -117,7 +125,7 @@ static struct object_entry *objects;
static struct object_stat *obj_stat;
static struct ofs_delta_entry *ofs_deltas;
static struct ref_delta_entry *ref_deltas;
-static struct thread_local nothread_data;
+static struct thread_local_data nothread_data;
static int nr_objects;
static int nr_ofs_deltas;
static int nr_ref_deltas;
@@ -143,12 +151,19 @@ static unsigned int input_offset, input_len;
static off_t consumed_bytes;
static off_t max_input_size;
static unsigned deepest_delta;
-static git_hash_ctx input_ctx;
+static struct git_hash_ctx input_ctx;
static uint32_t input_crc32;
static int input_fd, output_fd;
static const char *curr_pack;
-static struct thread_local *thread_data;
+/*
+ * outgoing_links is guarded by read_mutex, and record_outgoing_links is
+ * read-only in a thread.
+ */
+static struct oidset outgoing_links = OIDSET_INIT;
+static int record_outgoing_links;
+
+static struct thread_local_data *thread_data;
static int nr_dispatched;
static int threads_active;
@@ -264,13 +279,14 @@ static unsigned check_objects(void)
{
unsigned i, max, foreign_nr = 0;
- max = get_max_object_index();
+ max = get_max_object_index(the_repository);
if (verbose)
- progress = start_delayed_progress(_("Checking objects"), max);
+ progress = start_delayed_progress(the_repository,
+ _("Checking objects"), max);
for (i = 0; i < max; i++) {
- foreign_nr += check_object(get_indexed_object(i));
+ foreign_nr += check_object(get_indexed_object(the_repository, i));
display_progress(progress, i + 1);
}
@@ -285,7 +301,7 @@ static void flush(void)
if (input_offset) {
if (output_fd >= 0)
write_or_die(output_fd, input_buffer, input_offset);
- the_hash_algo->update_fn(&input_ctx, input_buffer, input_offset);
+ git_hash_update(&input_ctx, input_buffer, input_offset);
memmove(input_buffer, input_buffer + input_offset, input_len);
input_offset = 0;
}
@@ -364,16 +380,18 @@ static const char *open_pack_file(const char *pack_name)
static void parse_pack_header(void)
{
- struct pack_header *hdr = fill(sizeof(struct pack_header));
+ unsigned char *hdr = fill(sizeof(struct pack_header));
/* Header consistency check */
- if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
+ if (get_be32(hdr) != PACK_SIGNATURE)
die(_("pack signature mismatch"));
- if (!pack_version_ok(hdr->hdr_version))
+ hdr += 4;
+ if (!pack_version_ok_native(get_be32(hdr)))
die(_("pack version %"PRIu32" unsupported"),
- ntohl(hdr->hdr_version));
+ get_be32(hdr));
+ hdr += 4;
- nr_objects = ntohl(hdr->hdr_entries);
+ nr_objects = get_be32(hdr);
use(sizeof(struct pack_header));
}
@@ -390,7 +408,7 @@ static NORETURN void bad_object(off_t offset, const char *format, ...)
(uintmax_t)offset, buf);
}
-static inline struct thread_local *get_thread_data(void)
+static inline struct thread_local_data *get_thread_data(void)
{
if (HAVE_THREADS) {
if (threads_active)
@@ -401,7 +419,7 @@ static inline struct thread_local *get_thread_data(void)
return &nothread_data;
}
-static void set_thread_data(struct thread_local *data)
+static void set_thread_data(struct thread_local_data *data)
{
if (threads_active)
pthread_setspecific(key, data);
@@ -457,17 +475,18 @@ static void *unpack_entry_data(off_t offset, unsigned long size,
int status;
git_zstream stream;
void *buf;
- git_hash_ctx c;
+ struct git_hash_ctx c;
char hdr[32];
int hdrlen;
if (!is_delta_type(type)) {
hdrlen = format_object_header(hdr, sizeof(hdr), type, size);
the_hash_algo->init_fn(&c);
- the_hash_algo->update_fn(&c, hdr, hdrlen);
+ git_hash_update(&c, hdr, hdrlen);
} else
oid = NULL;
- if (type == OBJ_BLOB && size > big_file_threshold)
+ if (type == OBJ_BLOB &&
+ size > repo_settings_get_big_file_threshold(the_repository))
buf = fixed_buf;
else
buf = xmallocz(size);
@@ -484,7 +503,7 @@ static void *unpack_entry_data(off_t offset, unsigned long size,
status = git_inflate(&stream, 0);
use(input_len - stream.avail_in);
if (oid)
- the_hash_algo->update_fn(&c, last_out, stream.next_out - last_out);
+ git_hash_update(&c, last_out, stream.next_out - last_out);
if (buf == fixed_buf) {
stream.next_out = buf;
stream.avail_out = sizeof(fixed_buf);
@@ -494,7 +513,7 @@ static void *unpack_entry_data(off_t offset, unsigned long size,
bad_object(offset, _("inflate returned %d"), status);
git_inflate_end(&stream);
if (oid)
- the_hash_algo->final_oid_fn(oid, &c);
+ git_hash_final_oid(oid, &c);
return buf == fixed_buf ? NULL : buf;
}
@@ -781,7 +800,8 @@ static int check_collison(struct object_entry *entry)
enum object_type type;
unsigned long size;
- if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
+ if (entry->size <= repo_settings_get_big_file_threshold(the_repository) ||
+ entry->type != OBJ_BLOB)
return -1;
memset(&data, 0, sizeof(data));
@@ -799,6 +819,68 @@ static int check_collison(struct object_entry *entry)
return 0;
}
+static void record_outgoing_link(const struct object_id *oid)
+{
+ oidset_insert(&outgoing_links, oid);
+}
+
+static void maybe_record_name_entry(const struct name_entry *entry)
+{
+ /*
+ * Checking only trees here results in a significantly faster packfile
+ * indexing, but the drawback is that if the packfile to be indexed
+ * references a local blob only directly (that is, never through a
+ * local tree), that local blob is in danger of being garbage
+ * collected. Such a situation may arise if we push local commits,
+ * including one with a change to a blob in the root tree, and then the
+ * server incorporates them into its main branch through a "rebase" or
+ * "squash" merge strategy, and then we fetch the new main branch from
+ * the server.
+ *
+ * This situation has not been observed yet - we have only noticed
+ * missing commits, not missing trees or blobs. (In fact, if it were
+ * believed that only missing commits are problematic, one could argue
+ * that we should also exclude trees during the outgoing link check;
+ * but it is safer to include them.)
+ *
+ * Due to the rarity of the situation (it has not been observed to
+ * happen in real life), and because the "penalty" in such a situation
+ * is merely to refetch the missing blob when it's needed (and this
+ * happens only once - when refetched, the blob goes into a promisor
+ * pack, so it won't be GC-ed, the tradeoff seems worth it.
+ */
+ if (S_ISDIR(entry->mode))
+ record_outgoing_link(&entry->oid);
+}
+
+static void do_record_outgoing_links(struct object *obj)
+{
+ if (obj->type == OBJ_TREE) {
+ struct tree *tree = (struct tree *)obj;
+ struct tree_desc desc;
+ struct name_entry entry;
+ if (init_tree_desc_gently(&desc, &tree->object.oid,
+ tree->buffer, tree->size, 0))
+ /*
+ * Error messages are given when packs are
+ * verified, so do not print any here.
+ */
+ return;
+ while (tree_entry_gently(&desc, &entry))
+ maybe_record_name_entry(&entry);
+ } else if (obj->type == OBJ_COMMIT) {
+ struct commit *commit = (struct commit *) obj;
+ struct commit_list *parents = commit->parents;
+
+ record_outgoing_link(get_commit_tree_oid(commit));
+ for (; parents; parents = parents->next)
+ record_outgoing_link(&parents->item->object.oid);
+ } else if (obj->type == OBJ_TAG) {
+ struct tag *tag = (struct tag *) obj;
+ record_outgoing_link(get_tagged_oid(tag));
+ }
+}
+
static void sha1_object(const void *data, struct object_entry *obj_entry,
unsigned long size, enum object_type type,
const struct object_id *oid)
@@ -845,7 +927,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
free(has_data);
}
- if (strict || do_fsck_object) {
+ if (strict || do_fsck_object || record_outgoing_links) {
read_lock();
if (type == OBJ_BLOB) {
struct blob *blob = lookup_blob(the_repository, oid);
@@ -877,6 +959,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
die(_("fsck error in packed object"));
if (strict && fsck_walk(obj, NULL, &fsck_options))
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
+ if (record_outgoing_links)
+ do_record_outgoing_links(obj);
if (obj->type == OBJ_TREE) {
struct tree *item = (struct tree *) obj;
@@ -1166,10 +1250,11 @@ static void parse_pack_objects(unsigned char *hash)
struct ofs_delta_entry *ofs_delta = ofs_deltas;
struct object_id ref_delta_oid;
struct stat st;
- git_hash_ctx tmp_ctx;
+ struct git_hash_ctx tmp_ctx;
if (verbose)
progress = start_progress(
+ the_repository,
progress_title ? progress_title :
from_stdin ? _("Receiving objects") : _("Indexing objects"),
nr_objects);
@@ -1204,8 +1289,8 @@ static void parse_pack_objects(unsigned char *hash)
/* Check pack integrity */
flush();
the_hash_algo->init_fn(&tmp_ctx);
- the_hash_algo->clone_fn(&tmp_ctx, &input_ctx);
- the_hash_algo->final_fn(hash, &tmp_ctx);
+ git_hash_clone(&tmp_ctx, &input_ctx);
+ git_hash_final(hash, &tmp_ctx);
if (!hasheq(fill(the_hash_algo->rawsz), hash, the_repository->hash_algo))
die(_("pack is corrupted (SHA1 mismatch)"));
use(the_hash_algo->rawsz);
@@ -1238,7 +1323,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1250,14 +1335,14 @@ static void resolve_deltas(void)
QSORT(ref_deltas, nr_ref_deltas, compare_ref_delta_entry);
if (verbose || show_resolving_progress)
- progress = start_progress(_("Resolving deltas"),
+ progress = start_progress(the_repository,
+ _("Resolving deltas"),
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
- work_lock();
for (i = 0; i < nr_threads; i++) {
int ret = pthread_create(&thread_data[i].thread, NULL,
threaded_second_pass, thread_data + i);
@@ -1265,7 +1350,6 @@ static void resolve_deltas(void)
die(_("unable to create thread: %s"),
strerror(ret));
}
- work_unlock();
for (i = 0; i < nr_threads; i++)
pthread_join(thread_data[i].thread, NULL);
cleanup_thread();
@@ -1300,7 +1384,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
REALLOC_ARRAY(objects, nr_objects + nr_unresolved + 1);
memset(objects + nr_objects + 1, 0,
nr_unresolved * sizeof(*objects));
- f = hashfd(output_fd, curr_pack);
+ f = hashfd(the_repository->hash_algo, output_fd, curr_pack);
fix_unresolved_deltas(f);
strbuf_addf(&msg, Q_("completed with %d local object",
"completed with %d local objects",
@@ -1310,7 +1394,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
strbuf_release(&msg);
finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0);
hashcpy(read_hash, pack_hash, the_repository->hash_algo);
- fixup_pack_header_footer(output_fd, pack_hash,
+ fixup_pack_header_footer(the_hash_algo, output_fd, pack_hash,
curr_pack, nr_objects,
read_hash, consumed_bytes-the_hash_algo->rawsz);
if (!hasheq(read_hash, tail_hash, the_repository->hash_algo))
@@ -1479,7 +1563,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1505,9 +1589,9 @@ static void rename_tmp_packfile(const char **final_name,
struct strbuf *name, unsigned char *hash,
const char *ext, int make_read_only_if_same)
{
- if (*final_name != curr_name) {
+ if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
@@ -1552,7 +1636,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1603,6 +1688,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1650,7 +1739,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
@@ -1719,6 +1809,73 @@ static void show_pack_info(int stat_only)
free(chain_histogram);
}
+static void repack_local_links(void)
+{
+ struct child_process cmd = CHILD_PROCESS_INIT;
+ FILE *out;
+ struct strbuf line = STRBUF_INIT;
+ struct oidset_iter iter;
+ struct object_id *oid;
+ char *base_name = NULL;
+
+ if (!oidset_size(&outgoing_links))
+ return;
+
+ oidset_iter_init(&outgoing_links, &iter);
+ while ((oid = oidset_iter_next(&iter))) {
+ struct object_info info = OBJECT_INFO_INIT;
+ if (oid_object_info_extended(the_repository, oid, &info, 0))
+ /* Missing; assume it is a promisor object */
+ continue;
+ if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
+ continue;
+
+ if (!cmd.args.nr) {
+ base_name = mkpathdup(
+ "%s/pack/pack",
+ repo_get_object_directory(the_repository));
+ strvec_push(&cmd.args, "pack-objects");
+ strvec_push(&cmd.args,
+ "--exclude-promisor-objects-best-effort");
+ strvec_push(&cmd.args, base_name);
+ cmd.git_cmd = 1;
+ cmd.in = -1;
+ cmd.out = -1;
+ if (start_command(&cmd))
+ die(_("could not start pack-objects to repack local links"));
+ }
+
+ if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
+ write_in_full(cmd.in, "\n", 1) < 0)
+ die(_("failed to feed local object to pack-objects"));
+ }
+
+ if (!cmd.args.nr)
+ return;
+
+ close(cmd.in);
+
+ out = xfdopen(cmd.out, "r");
+ while (strbuf_getline_lf(&line, out) != EOF) {
+ unsigned char binary[GIT_MAX_RAWSZ];
+ if (line.len != the_hash_algo->hexsz ||
+ !hex_to_bytes(binary, line.buf, line.len))
+ die(_("index-pack: Expecting full hex object ID lines only from pack-objects."));
+
+ /*
+ * pack-objects creates the .pack and .idx files, but not the
+ * .promisor file. Create the .promisor file, which is empty.
+ */
+ write_special_file("promisor", "", NULL, binary, NULL);
+ }
+
+ fclose(out);
+ if (finish_command(&cmd))
+ die(_("could not finish pack-objects to repack local links"));
+ strbuf_release(&line);
+ free(base_name);
+}
+
int cmd_index_pack(int argc,
const char **argv,
const char *prefix,
@@ -1726,7 +1883,7 @@ int cmd_index_pack(int argc,
{
int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index;
const char *curr_index;
- const char *curr_rev_index = NULL;
+ char *curr_rev_index = NULL;
const char *index_name = NULL, *pack_name = NULL, *rev_index_name = NULL;
const char *keep_msg = NULL;
const char *promisor_msg = NULL;
@@ -1747,8 +1904,7 @@ int cmd_index_pack(int argc,
*/
fetch_if_missing = 0;
- if (argc == 2 && !strcmp(argv[1], "-h"))
- usage(index_pack_usage);
+ show_usage_if_asked(argc, argv, index_pack_usage);
disable_replace_refs();
fsck_options.walk = mark_link;
@@ -1794,7 +1950,7 @@ int cmd_index_pack(int argc,
} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
; /* nothing to do */
} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
- ; /* already parsed */
+ record_outgoing_links = 1;
} else if (starts_with(arg, "--threads=")) {
char *end;
nr_threads = strtoul(arg+10, &end, 0);
@@ -1804,19 +1960,11 @@ int cmd_index_pack(int argc,
warning(_("no threads support, ignoring %s"), arg);
nr_threads = 1;
}
- } else if (starts_with(arg, "--pack_header=")) {
- struct pack_header *hdr;
- char *c;
-
- hdr = (struct pack_header *)input_buffer;
- hdr->hdr_signature = htonl(PACK_SIGNATURE);
- hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
- if (*c != ',')
- die(_("bad %s"), arg);
- hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
- if (*c)
- die(_("bad %s"), arg);
- input_len = sizeof(*hdr);
+ } else if (skip_prefix(arg, "--pack_header=", &arg)) {
+ if (parse_pack_header_option(arg,
+ input_buffer,
+ &input_len) < 0)
+ die(_("bad --pack_header: %s"), arg);
} else if (!strcmp(arg, "-v")) {
verbose = 1;
} else if (!strcmp(arg, "--progress-title")) {
@@ -1865,6 +2013,8 @@ int cmd_index_pack(int argc,
usage(index_pack_usage);
if (fix_thin_pack && !from_stdin)
die(_("the option '%s' requires '%s'"), "--fix-thin", "--stdin");
+ if (promisor_msg && pack_name)
+ die(_("--promisor cannot be used with a pack name"));
if (from_stdin && !startup_info->have_repository)
die(_("--stdin requires a git repository"));
if (from_stdin && hash_algo)
@@ -1928,7 +2078,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
@@ -1941,11 +2091,12 @@ int cmd_index_pack(int argc,
ALLOC_ARRAY(idx_objects, nr_objects);
for (i = 0; i < nr_objects; i++)
idx_objects[i] = &objects[i].idx;
- curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash);
+ curr_index = write_idx_file(the_repository, index_name, idx_objects,
+ nr_objects, &opts, pack_hash);
if (rev_index)
- curr_rev_index = write_rev_file(rev_index_name, idx_objects,
- nr_objects, pack_hash,
- opts.flags);
+ curr_rev_index = write_rev_file(the_repository, rev_index_name,
+ idx_objects, nr_objects,
+ pack_hash, opts.flags);
free(idx_objects);
if (!verify)
@@ -1968,8 +2119,9 @@ int cmd_index_pack(int argc,
free((void *) curr_pack);
if (!index_name)
free((void *) curr_index);
- if (!rev_index_name)
- free((void *) curr_rev_index);
+ free(curr_rev_index);
+
+ repack_local_links();
/*
* Let the caller know this pack is not self contained