summaryrefslogtreecommitdiff
path: root/builtin/repack.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/repack.c')
-rw-r--r--builtin/repack.c209
1 files changed, 181 insertions, 28 deletions
diff --git a/builtin/repack.c b/builtin/repack.c
index 59214dbdfd..a4def39197 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -17,7 +17,7 @@
#include "midx.h"
#include "packfile.h"
#include "prune-packed.h"
-#include "object-store.h"
+#include "odb.h"
#include "promisor-remote.h"
#include "shallow.h"
#include "pack.h"
@@ -39,11 +39,12 @@ static int write_bitmaps = -1;
static int use_delta_islands;
static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
+static int midx_must_contain_cruft = 1;
static const char *const git_repack_usage[] = {
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
"[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
- "[--write-midx] [--name-hash-version=<n>]"),
+ "[--write-midx] [--name-hash-version=<n>] [--path-walk]"),
NULL
};
@@ -63,6 +64,7 @@ struct pack_objects_args {
int quiet;
int local;
int name_hash_version;
+ int path_walk;
struct list_objects_filter_options filter_options;
};
@@ -107,6 +109,10 @@ static int repack_config(const char *var, const char *value,
free(cruft_po_args->threads);
return git_config_string(&cruft_po_args->threads, var, value);
}
+ if (!strcmp(var, "repack.midxmustcontaincruft")) {
+ midx_must_contain_cruft = git_config_bool(var, value);
+ return 0;
+ }
return git_default_config(var, value, ctx, cb);
}
@@ -217,9 +223,9 @@ static void mark_packs_for_deletion(struct existing_packs *existing,
static void remove_redundant_pack(const char *dir_name, const char *base_name)
{
struct strbuf buf = STRBUF_INIT;
- struct multi_pack_index *m = get_local_multi_pack_index(the_repository);
+ struct multi_pack_index *m = get_multi_pack_index(the_repository->objects->sources);
strbuf_addf(&buf, "%s.pack", base_name);
- if (m && midx_contains_pack(m, buf.buf))
+ if (m && m->local && midx_contains_pack(m, buf.buf))
clear_midx_file(the_repository);
strbuf_insertf(&buf, 0, "%s/", dir_name);
unlink_pack_path(buf.buf, 1);
@@ -313,6 +319,8 @@ static void prepare_pack_objects(struct child_process *cmd,
strvec_pushf(&cmd->args, "--no-reuse-object");
if (args->name_hash_version)
strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version);
+ if (args->path_walk)
+ strvec_pushf(&cmd->args, "--path-walk");
if (args->local)
strvec_push(&cmd->args, "--local");
if (args->quiet)
@@ -687,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry)
free(geometry->pack);
}
+static int midx_has_unknown_packs(char **midx_pack_names,
+ size_t midx_pack_names_nr,
+ struct string_list *include,
+ struct pack_geometry *geometry,
+ struct existing_packs *existing)
+{
+ size_t i;
+
+ string_list_sort(include);
+
+ for (i = 0; i < midx_pack_names_nr; i++) {
+ const char *pack_name = midx_pack_names[i];
+
+ /*
+ * Determine whether or not each MIDX'd pack from the existing
+ * MIDX (if any) is represented in the new MIDX. For each pack
+ * in the MIDX, it must either be:
+ *
+ * - In the "include" list of packs to be included in the new
+ * MIDX. Note this function is called before the include
+ * list is populated with any cruft pack(s).
+ *
+ * - Below the geometric split line (if using pack geometry),
+ * indicating that the pack won't be included in the new
+ * MIDX, but its contents were rolled up as part of the
+ * geometric repack.
+ *
+ * - In the existing non-kept packs list (if not using pack
+ * geometry), and marked as non-deleted.
+ */
+ if (string_list_has_string(include, pack_name)) {
+ continue;
+ } else if (geometry) {
+ struct strbuf buf = STRBUF_INIT;
+ uint32_t j;
+
+ for (j = 0; j < geometry->split; j++) {
+ strbuf_reset(&buf);
+ strbuf_addstr(&buf, pack_basename(geometry->pack[j]));
+ strbuf_strip_suffix(&buf, ".pack");
+ strbuf_addstr(&buf, ".idx");
+
+ if (!strcmp(pack_name, buf.buf)) {
+ strbuf_release(&buf);
+ break;
+ }
+ }
+
+ strbuf_release(&buf);
+
+ if (j < geometry->split)
+ continue;
+ } else {
+ struct string_list_item *item;
+
+ item = string_list_lookup(&existing->non_kept_packs,
+ pack_name);
+ if (item && !pack_is_marked_for_deletion(item))
+ continue;
+ }
+
+ /*
+ * If we got to this point, the MIDX includes some pack that we
+ * don't know about.
+ */
+ return 1;
+ }
+
+ return 0;
+}
+
struct midx_snapshot_ref_data {
struct tempfile *f;
struct oidset seen;
@@ -707,7 +786,7 @@ static int midx_snapshot_ref_one(const char *refname UNUSED,
if (oidset_insert(&data->seen, oid))
return 0; /* already seen */
- if (oid_object_info(the_repository, oid, NULL) != OBJ_COMMIT)
+ if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT)
return 0;
fprintf(data->f->fp, "%s%s\n", data->preferred ? "+" : "",
@@ -755,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f)
static void midx_included_packs(struct string_list *include,
struct existing_packs *existing,
+ char **midx_pack_names,
+ size_t midx_pack_names_nr,
struct string_list *names,
struct pack_geometry *geometry)
{
@@ -808,26 +889,56 @@ static void midx_included_packs(struct string_list *include,
}
}
- for_each_string_list_item(item, &existing->cruft_packs) {
+ if (midx_must_contain_cruft ||
+ midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr,
+ include, geometry, existing)) {
/*
- * When doing a --geometric repack, there is no need to check
- * for deleted packs, since we're by definition not doing an
- * ALL_INTO_ONE repack (hence no packs will be deleted).
- * Otherwise we must check for and exclude any packs which are
- * enqueued for deletion.
+ * If there are one or more unknown pack(s) present (see
+ * midx_has_unknown_packs() for what makes a pack
+ * "unknown") in the MIDX before the repack, keep them
+ * as they may be required to form a reachability
+ * closure if the MIDX is bitmapped.
*
- * So we could omit the conditional below in the --geometric
- * case, but doing so is unnecessary since no packs are marked
- * as pending deletion (since we only call
- * `mark_packs_for_deletion()` when doing an all-into-one
- * repack).
+ * For example, a cruft pack can be required to form a
+ * reachability closure if the MIDX is bitmapped and one
+ * or more of the bitmap's selected commits reaches a
+ * once-cruft object that was later made reachable.
*/
- if (pack_is_marked_for_deletion(item))
- continue;
+ for_each_string_list_item(item, &existing->cruft_packs) {
+ /*
+ * When doing a --geometric repack, there is no
+ * need to check for deleted packs, since we're
+ * by definition not doing an ALL_INTO_ONE
+ * repack (hence no packs will be deleted).
+ * Otherwise we must check for and exclude any
+ * packs which are enqueued for deletion.
+ *
+ * So we could omit the conditional below in the
+ * --geometric case, but doing so is unnecessary
+ * since no packs are marked as pending
+ * deletion (since we only call
+ * `mark_packs_for_deletion()` when doing an
+ * all-into-one repack).
+ */
+ if (pack_is_marked_for_deletion(item))
+ continue;
- strbuf_reset(&buf);
- strbuf_addf(&buf, "%s.idx", item->string);
- string_list_insert(include, buf.buf);
+ strbuf_reset(&buf);
+ strbuf_addf(&buf, "%s.idx", item->string);
+ string_list_insert(include, buf.buf);
+ }
+ } else {
+ /*
+ * Modern versions of Git (with the appropriate
+ * configuration setting) will write new copies of
+ * once-cruft objects when doing a --geometric repack.
+ *
+ * If the MIDX has no cruft pack, new packs written
+ * during a --geometric repack will not rely on the
+ * cruft pack to form a reachability closure, so we can
+ * avoid including them in the MIDX in that case.
+ */
+ ;
}
strbuf_release(&buf);
@@ -1142,6 +1253,8 @@ int cmd_repack(int argc,
struct tempfile *refs_snapshot = NULL;
int i, ext, ret;
int show_progress;
+ char **midx_pack_names = NULL;
+ size_t midx_pack_names_nr = 0;
/* variables to be filled by option parsing */
int delete_redundant = 0;
@@ -1184,6 +1297,8 @@ int cmd_repack(int argc,
N_("pass --no-reuse-object to git-pack-objects")),
OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version,
N_("specify the name hash version to use for grouping similar objects by path")),
+ OPT_BOOL(0, "path-walk", &po_args.path_walk,
+ N_("pass --path-walk to git-pack-objects")),
OPT_NEGBIT('n', NULL, &run_update_server_info,
N_("do not run git-update-server-info"), 1),
OPT__QUIET(&po_args.quiet, N_("be quiet")),
@@ -1225,7 +1340,7 @@ int cmd_repack(int argc,
list_objects_filter_init(&po_args.filter_options);
- git_config(repack_config, &cruft_po_args);
+ repo_config(the_repository, repack_config, &cruft_po_args);
argc = parse_options(argc, argv, prefix, builtin_repack_options,
git_repack_usage, 0);
@@ -1235,7 +1350,7 @@ int cmd_repack(int argc,
po_args.depth = xstrdup_or_null(opt_depth);
po_args.threads = xstrdup_or_null(opt_threads);
- if (delete_redundant && repository_format_precious_objects)
+ if (delete_redundant && the_repository->repository_format_precious_objects)
die(_("cannot delete packs in a precious-objects repo"));
die_for_incompatible_opt3(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE), "-A",
@@ -1256,7 +1371,8 @@ int cmd_repack(int argc,
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx)
die(_(incremental_bitmap_conflict_error));
- if (write_bitmaps && po_args.local && has_alt_odb(the_repository)) {
+ if (write_bitmaps && po_args.local &&
+ odb_has_alternates(the_repository->objects)) {
/*
* When asked to do a local repack, but we have
* packfiles that are inherited from an alternate, then
@@ -1356,7 +1472,10 @@ int cmd_repack(int argc,
!(pack_everything & PACK_CRUFT))
strvec_push(&cmd.args, "--pack-loose-unreachable");
} else if (geometry.split_factor) {
- strvec_push(&cmd.args, "--stdin-packs");
+ if (midx_must_contain_cruft)
+ strvec_push(&cmd.args, "--stdin-packs");
+ else
+ strvec_push(&cmd.args, "--stdin-packs=follow");
strvec_push(&cmd.args, "--unpacked");
} else {
strvec_push(&cmd.args, "--unpacked");
@@ -1396,8 +1515,25 @@ int cmd_repack(int argc,
if (ret)
goto cleanup;
- if (!names.nr && !po_args.quiet)
- printf_ln(_("Nothing new to pack."));
+ if (!names.nr) {
+ if (!po_args.quiet)
+ printf_ln(_("Nothing new to pack."));
+ /*
+ * If we didn't write any new packs, the non-cruft packs
+ * may refer to once-unreachable objects in the cruft
+ * pack(s).
+ *
+ * If there isn't already a MIDX, the one we write
+ * must include the cruft pack(s), in case the
+ * non-cruft pack(s) refer to once-cruft objects.
+ *
+ * If there is already a MIDX, we can punt here, since
+ * midx_has_unknown_packs() will make the decision for
+ * us.
+ */
+ if (!get_multi_pack_index(the_repository->objects->sources))
+ midx_must_contain_cruft = 1;
+ }
if (pack_everything & PACK_CRUFT) {
const char *pack_prefix = find_pack_prefix(packdir, packtmp);
@@ -1478,6 +1614,19 @@ int cmd_repack(int argc,
string_list_sort(&names);
+ if (get_multi_pack_index(the_repository->objects->sources)) {
+ struct multi_pack_index *m =
+ get_multi_pack_index(the_repository->objects->sources);
+
+ ALLOC_ARRAY(midx_pack_names,
+ m->num_packs + m->num_packs_in_base);
+
+ for (; m; m = m->base_midx)
+ for (uint32_t i = 0; i < m->num_packs; i++)
+ midx_pack_names[midx_pack_names_nr++] =
+ xstrdup(m->pack_names[i]);
+ }
+
close_object_store(the_repository->objects);
/*
@@ -1519,7 +1668,8 @@ int cmd_repack(int argc,
if (write_midx) {
struct string_list include = STRING_LIST_INIT_DUP;
- midx_included_packs(&include, &existing, &names, &geometry);
+ midx_included_packs(&include, &existing, midx_pack_names,
+ midx_pack_names_nr, &names, &geometry);
ret = write_midx_included_packs(&include, &geometry, &names,
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
@@ -1570,6 +1720,9 @@ cleanup:
string_list_clear(&names, 1);
existing_packs_release(&existing);
free_pack_geometry(&geometry);
+ for (size_t i = 0; i < midx_pack_names_nr; i++)
+ free(midx_pack_names[i]);
+ free(midx_pack_names);
pack_objects_args_release(&po_args);
pack_objects_args_release(&cruft_po_args);