summaryrefslogtreecommitdiff
path: root/builtin/pack-objects.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/pack-objects.c')
-rw-r--r--builtin/pack-objects.c351
1 files changed, 320 insertions, 31 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 014dcd4bc9..39e28cfcaf 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -36,6 +36,7 @@
#include "trace2.h"
#include "shallow.h"
#include "promisor-remote.h"
+#include "pack-mtimes.h"
/*
* Objects we are going to pack are collected in the `to_pack` structure.
@@ -194,6 +195,8 @@ static int reuse_delta = 1, reuse_object = 1;
static int keep_unreachable, unpack_unreachable, include_tag;
static timestamp_t unpack_unreachable_expiration;
static int pack_loose_unreachable;
+static int cruft;
+static timestamp_t cruft_expiration;
static int local;
static int have_non_local_packs;
static int incremental;
@@ -1260,9 +1263,13 @@ static void write_pack_file(void)
&to_pack, written_list, nr_written);
}
+ if (cruft)
+ pack_idx_opts.flags |= WRITE_MTIMES;
+
stage_tmp_packfiles(&tmpname, pack_tmp_name,
written_list, nr_written,
- &pack_idx_opts, hash, &idx_tmp_name);
+ &to_pack, &pack_idx_opts, hash,
+ &idx_tmp_name);
if (write_bitmap_index) {
size_t tmpname_len = tmpname.len;
@@ -1357,6 +1364,9 @@ static int want_found_object(const struct object_id *oid, int exclude,
if (incremental)
return 0;
+ if (!is_pack_valid(p))
+ return -1;
+
/*
* When asked to do --local (do not include an object that appears in a
* pack we borrow from elsewhere) or --honor-pack-keep (do not include
@@ -1472,6 +1482,9 @@ static int want_object_in_pack(const struct object_id *oid,
want = want_found_object(oid, exclude, *found_pack);
if (want != -1)
return want;
+
+ *found_pack = NULL;
+ *found_offset = 0;
}
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
@@ -1515,13 +1528,13 @@ static int want_object_in_pack(const struct object_id *oid,
return 1;
}
-static void create_object_entry(const struct object_id *oid,
- enum object_type type,
- uint32_t hash,
- int exclude,
- int no_try_delta,
- struct packed_git *found_pack,
- off_t found_offset)
+static struct object_entry *create_object_entry(const struct object_id *oid,
+ enum object_type type,
+ uint32_t hash,
+ int exclude,
+ int no_try_delta,
+ struct packed_git *found_pack,
+ off_t found_offset)
{
struct object_entry *entry;
@@ -1538,6 +1551,8 @@ static void create_object_entry(const struct object_id *oid,
}
entry->no_try_delta = no_try_delta;
+
+ return entry;
}
static const char no_closure_warning[] = N_(
@@ -3155,7 +3170,7 @@ static int git_pack_config(const char *k, const char *v, void *cb)
if (!strcmp(k, "pack.indexversion")) {
pack_idx_opts.version = git_config_int(k, v);
if (pack_idx_opts.version > 2)
- die(_("bad pack.indexversion=%"PRIu32),
+ die(_("bad pack.indexVersion=%"PRIu32),
pack_idx_opts.version);
return 0;
}
@@ -3201,10 +3216,8 @@ static int add_object_entry_from_pack(const struct object_id *oid,
uint32_t pos,
void *_data)
{
- struct rev_info *revs = _data;
- struct object_info oi = OBJECT_INFO_INIT;
off_t ofs;
- enum object_type type;
+ enum object_type type = OBJ_NONE;
display_progress(progress_state, ++nr_seen);
@@ -3215,19 +3228,24 @@ static int add_object_entry_from_pack(const struct object_id *oid,
if (!want_object_in_pack(oid, 0, &p, &ofs))
return 0;
- oi.typep = &type;
- if (packed_object_info(the_repository, p, ofs, &oi) < 0)
- die(_("could not get type of object %s in pack %s"),
- oid_to_hex(oid), p->pack_name);
- else if (type == OBJ_COMMIT) {
- /*
- * commits in included packs are used as starting points for the
- * subsequent revision walk
- */
- add_pending_oid(revs, NULL, oid, 0);
- }
+ if (p) {
+ struct rev_info *revs = _data;
+ struct object_info oi = OBJECT_INFO_INIT;
- stdin_packs_found_nr++;
+ oi.typep = &type;
+ if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
+ die(_("could not get type of object %s in pack %s"),
+ oid_to_hex(oid), p->pack_name);
+ } else if (type == OBJ_COMMIT) {
+ /*
+ * commits in included packs are used as starting points for the
+ * subsequent revision walk
+ */
+ add_pending_oid(revs, NULL, oid, 0);
+ }
+
+ stdin_packs_found_nr++;
+ }
create_object_entry(oid, type, 0, 0, 0, p, ofs);
@@ -3346,6 +3364,8 @@ static void read_packs_list_from_stdin(void)
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
+ if (!is_pack_valid(p))
+ die(_("packfile %s cannot be accessed"), p->pack_name);
}
/*
@@ -3369,8 +3389,6 @@ static void read_packs_list_from_stdin(void)
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
- if (!p)
- die(_("could not find pack '%s'"), item->string);
for_each_object_in_pack(p,
add_object_entry_from_pack,
&revs,
@@ -3394,6 +3412,217 @@ static void read_packs_list_from_stdin(void)
string_list_clear(&exclude_packs, 0);
}
+static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
+ struct packed_git *pack, off_t offset,
+ const char *name, uint32_t mtime)
+{
+ struct object_entry *entry;
+
+ display_progress(progress_state, ++nr_seen);
+
+ entry = packlist_find(&to_pack, oid);
+ if (entry) {
+ if (name) {
+ entry->hash = pack_name_hash(name);
+ entry->no_try_delta = no_try_delta(name);
+ }
+ } else {
+ if (!want_object_in_pack(oid, 0, &pack, &offset))
+ return;
+ if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
+ /*
+ * If a traversed tree has a missing blob then we want
+ * to avoid adding that missing object to our pack.
+ *
+ * This only applies to missing blobs, not trees,
+ * because the traversal needs to parse sub-trees but
+ * not blobs.
+ *
+ * Note we only perform this check when we couldn't
+ * already find the object in a pack, so we're really
+ * limited to "ensure non-tip blobs which don't exist in
+ * packs do exist via loose objects". Confused?
+ */
+ return;
+ }
+
+ entry = create_object_entry(oid, type, pack_name_hash(name),
+ 0, name && no_try_delta(name),
+ pack, offset);
+ }
+
+ if (mtime > oe_cruft_mtime(&to_pack, entry))
+ oe_set_cruft_mtime(&to_pack, entry, mtime);
+ return;
+}
+
+static void show_cruft_object(struct object *obj, const char *name, void *data)
+{
+ /*
+ * if we did not record it earlier, it's at least as old as our
+ * expiration value. Rather than find it exactly, just use that
+ * value. This may bump it forward from its real mtime, but it
+ * will still be "too old" next time we run with the same
+ * expiration.
+ *
+ * if obj does appear in the packing list, this call is a noop (or may
+ * set the namehash).
+ */
+ add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
+}
+
+static void show_cruft_commit(struct commit *commit, void *data)
+{
+ show_cruft_object((struct object*)commit, NULL, data);
+}
+
+static int cruft_include_check_obj(struct object *obj, void *data)
+{
+ return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+}
+
+static int cruft_include_check(struct commit *commit, void *data)
+{
+ return cruft_include_check_obj((struct object*)commit, data);
+}
+
+static void set_cruft_mtime(const struct object *object,
+ struct packed_git *pack,
+ off_t offset, time_t mtime)
+{
+ add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
+ mtime);
+}
+
+static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
+{
+ struct string_list_item *item = NULL;
+ for_each_string_list_item(item, packs) {
+ struct packed_git *p = item->util;
+ if (!p)
+ die(_("could not find pack '%s'"), item->string);
+ p->pack_keep_in_core = keep;
+ }
+}
+
+static void add_unreachable_loose_objects(void);
+static void add_objects_in_unpacked_packs(void);
+
+static void enumerate_cruft_objects(void)
+{
+ if (progress)
+ progress_state = start_progress(_("Enumerating cruft objects"), 0);
+
+ add_objects_in_unpacked_packs();
+ add_unreachable_loose_objects();
+
+ stop_progress(&progress_state);
+}
+
+static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
+{
+ struct packed_git *p;
+ struct rev_info revs;
+ int ret;
+
+ repo_init_revisions(the_repository, &revs, NULL);
+
+ revs.tag_objects = 1;
+ revs.tree_objects = 1;
+ revs.blob_objects = 1;
+
+ revs.include_check = cruft_include_check;
+ revs.include_check_obj = cruft_include_check_obj;
+
+ revs.ignore_missing_links = 1;
+
+ if (progress)
+ progress_state = start_progress(_("Enumerating cruft objects"), 0);
+ ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
+ set_cruft_mtime, 1);
+ stop_progress(&progress_state);
+
+ if (ret)
+ die(_("unable to add cruft objects"));
+
+ /*
+ * Re-mark only the fresh packs as kept so that objects in
+ * unknown packs do not halt the reachability traversal early.
+ */
+ for (p = get_all_packs(the_repository); p; p = p->next)
+ p->pack_keep_in_core = 0;
+ mark_pack_kept_in_core(fresh_packs, 1);
+
+ if (prepare_revision_walk(&revs))
+ die(_("revision walk setup failed"));
+ if (progress)
+ progress_state = start_progress(_("Traversing cruft objects"), 0);
+ nr_seen = 0;
+ traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
+
+ stop_progress(&progress_state);
+}
+
+static void read_cruft_objects(void)
+{
+ struct strbuf buf = STRBUF_INIT;
+ struct string_list discard_packs = STRING_LIST_INIT_DUP;
+ struct string_list fresh_packs = STRING_LIST_INIT_DUP;
+ struct packed_git *p;
+
+ ignore_packed_keep_in_core = 1;
+
+ while (strbuf_getline(&buf, stdin) != EOF) {
+ if (!buf.len)
+ continue;
+
+ if (*buf.buf == '-')
+ string_list_append(&discard_packs, buf.buf + 1);
+ else
+ string_list_append(&fresh_packs, buf.buf);
+ strbuf_reset(&buf);
+ }
+
+ string_list_sort(&discard_packs);
+ string_list_sort(&fresh_packs);
+
+ for (p = get_all_packs(the_repository); p; p = p->next) {
+ const char *pack_name = pack_basename(p);
+ struct string_list_item *item;
+
+ item = string_list_lookup(&fresh_packs, pack_name);
+ if (!item)
+ item = string_list_lookup(&discard_packs, pack_name);
+
+ if (item) {
+ item->util = p;
+ } else {
+ /*
+ * This pack wasn't mentioned in either the "fresh" or
+ * "discard" list, so the caller didn't know about it.
+ *
+ * Mark it as kept so that its objects are ignored by
+ * add_unseen_recent_objects_to_traversal(). We'll
+ * unmark it before starting the traversal so it doesn't
+ * halt the traversal early.
+ */
+ p->pack_keep_in_core = 1;
+ }
+ }
+
+ mark_pack_kept_in_core(&fresh_packs, 1);
+ mark_pack_kept_in_core(&discard_packs, 0);
+
+ if (cruft_expiration)
+ enumerate_and_traverse_cruft_objects(&fresh_packs);
+ else
+ enumerate_cruft_objects();
+
+ strbuf_release(&buf);
+ string_list_clear(&discard_packs, 0);
+ string_list_clear(&fresh_packs, 0);
+}
+
static void read_object_list_from_stdin(void)
{
char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
@@ -3526,7 +3755,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
uint32_t pos,
void *_data)
{
- add_object_entry(oid, OBJ_NONE, "", 0);
+ if (cruft) {
+ off_t offset;
+ time_t mtime;
+
+ if (pack->is_cruft) {
+ if (load_pack_mtimes(pack) < 0)
+ die(_("could not load cruft pack .mtimes"));
+ mtime = nth_packed_mtime(pack, pos);
+ } else {
+ mtime = pack->mtime;
+ }
+ offset = nth_packed_object_offset(pack, pos);
+
+ add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
+ NULL, mtime);
+ } else {
+ add_object_entry(oid, OBJ_NONE, "", 0);
+ }
return 0;
}
@@ -3550,7 +3796,19 @@ static int add_loose_object(const struct object_id *oid, const char *path,
return 0;
}
- add_object_entry(oid, type, "", 0);
+ if (cruft) {
+ struct stat st;
+ if (stat(path, &st) < 0) {
+ if (errno == ENOENT)
+ return 0;
+ return error_errno("unable to stat %s", oid_to_hex(oid));
+ }
+
+ add_cruft_object_entry(oid, type, NULL, 0, NULL,
+ st.st_mtime);
+ } else {
+ add_object_entry(oid, type, "", 0);
+ }
return 0;
}
@@ -3790,7 +4048,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
if (unpack_unreachable_expiration) {
revs->ignore_missing_links = 1;
if (add_unseen_recent_objects_to_traversal(revs,
- unpack_unreachable_expiration))
+ unpack_unreachable_expiration, NULL, 0))
die(_("unable to add recent objects"));
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
@@ -3867,6 +4125,20 @@ static int option_parse_unpack_unreachable(const struct option *opt,
return 0;
}
+static int option_parse_cruft_expiration(const struct option *opt,
+ const char *arg, int unset)
+{
+ if (unset) {
+ cruft = 0;
+ cruft_expiration = 0;
+ } else {
+ cruft = 1;
+ if (arg)
+ cruft_expiration = approxidate(arg);
+ }
+ return 0;
+}
+
struct po_filter_data {
unsigned have_revs:1;
struct rev_info revs;
@@ -3956,6 +4228,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
N_("unpack unreachable objects newer than <time>"),
PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
+ OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
+ OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
+ N_("expire cruft objects older than <time>"),
+ PARSE_OPT_OPTARG, option_parse_cruft_expiration),
OPT_BOOL(0, "sparse", &sparse,
N_("use the sparse reachability algorithm")),
OPT_BOOL(0, "thin", &thin,
@@ -4082,7 +4358,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!HAVE_THREADS && delta_search_threads != 1)
warning(_("no threads support, ignoring --threads"));
- if (!pack_to_stdout && !pack_size_limit)
+ if (!pack_to_stdout && !pack_size_limit && !cruft)
pack_size_limit = pack_size_limit_cfg;
if (pack_to_stdout && pack_size_limit)
die(_("--max-pack-size cannot be used to build a pack for transfer"));
@@ -4109,6 +4385,15 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (stdin_packs && use_internal_rev_list)
die(_("cannot use internal rev list with --stdin-packs"));
+ if (cruft) {
+ if (use_internal_rev_list)
+ die(_("cannot use internal rev list with --cruft"));
+ if (stdin_packs)
+ die(_("cannot use --stdin-packs with --cruft"));
+ if (pack_size_limit)
+ die(_("cannot use --max-pack-size with --cruft"));
+ }
+
/*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want
*
@@ -4165,7 +4450,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
the_repository);
prepare_packing_data(the_repository, &to_pack);
- if (progress)
+ if (progress && !cruft)
progress_state = start_progress(_("Enumerating objects"), 0);
if (stdin_packs) {
/* avoids adding objects in excluded packs */
@@ -4173,15 +4458,19 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
read_packs_list_from_stdin();
if (rev_list_unpacked)
add_unreachable_loose_objects();
+ } else if (cruft) {
+ read_cruft_objects();
} else if (!use_internal_rev_list) {
read_object_list_from_stdin();
} else if (pfd.have_revs) {
get_object_list(&pfd.revs, rp.nr, rp.v);
+ release_revisions(&pfd.revs);
} else {
struct rev_info revs;
repo_init_revisions(the_repository, &revs, NULL);
get_object_list(&revs, rp.nr, rp.v);
+ release_revisions(&revs);
}
cleanup_preferred_base();
if (include_tag && nr_result)