diff options
Diffstat (limited to 'path-walk.c')
-rw-r--r-- | path-walk.c | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/path-walk.c b/path-walk.c new file mode 100644 index 0000000000..2d4ddbadd5 --- /dev/null +++ b/path-walk.c @@ -0,0 +1,613 @@ +/* + * path-walk.c: implementation for path-based walks of the object graph. + */ +#include "git-compat-util.h" +#include "path-walk.h" +#include "blob.h" +#include "commit.h" +#include "dir.h" +#include "hashmap.h" +#include "hex.h" +#include "list-objects.h" +#include "object.h" +#include "oid-array.h" +#include "prio-queue.h" +#include "repository.h" +#include "revision.h" +#include "string-list.h" +#include "strmap.h" +#include "tag.h" +#include "trace2.h" +#include "tree.h" +#include "tree-walk.h" + +static const char *root_path = ""; + +struct type_and_oid_list { + enum object_type type; + struct oid_array oids; + int maybe_interesting; +}; + +#define TYPE_AND_OID_LIST_INIT { \ + .type = OBJ_NONE, \ + .oids = OID_ARRAY_INIT \ +} + +struct path_walk_context { + /** + * Repeats of data in 'struct path_walk_info' for + * access with fewer characters. + */ + struct repository *repo; + struct rev_info *revs; + struct path_walk_info *info; + + /** + * Map a path to a 'struct type_and_oid_list' + * containing the objects discovered at that + * path. + */ + struct strmap paths_to_lists; + + /** + * Store the current list of paths in a priority queue, + * using object type as a sorting mechanism, mostly to + * make sure blobs are popped off the stack first. No + * other sort is made, so within each object type it acts + * like a stack and performs a DFS within the trees. + * + * Use path_stack_pushed to indicate whether a path + * was previously added to path_stack. + */ + struct prio_queue path_stack; + struct strset path_stack_pushed; +}; + +static int compare_by_type(const void *one, const void *two, void *cb_data) +{ + struct type_and_oid_list *list1, *list2; + const char *str1 = one; + const char *str2 = two; + struct path_walk_context *ctx = cb_data; + + list1 = strmap_get(&ctx->paths_to_lists, str1); + list2 = strmap_get(&ctx->paths_to_lists, str2); + + /* + * If object types are equal, then use path comparison. + */ + if (!list1 || !list2 || list1->type == list2->type) + return strcmp(str1, str2); + + /* Prefer tags to be popped off first. */ + if (list1->type == OBJ_TAG) + return -1; + if (list2->type == OBJ_TAG) + return 1; + + /* Prefer blobs to be popped off second. */ + if (list1->type == OBJ_BLOB) + return -1; + if (list2->type == OBJ_BLOB) + return 1; + + return 0; +} + +static void push_to_stack(struct path_walk_context *ctx, + const char *path) +{ + if (strset_contains(&ctx->path_stack_pushed, path)) + return; + + strset_add(&ctx->path_stack_pushed, path); + prio_queue_put(&ctx->path_stack, xstrdup(path)); +} + +static int add_tree_entries(struct path_walk_context *ctx, + const char *base_path, + struct object_id *oid) +{ + struct tree_desc desc; + struct name_entry entry; + struct strbuf path = STRBUF_INIT; + size_t base_len; + struct tree *tree = lookup_tree(ctx->repo, oid); + + if (!tree) { + error(_("failed to walk children of tree %s: not found"), + oid_to_hex(oid)); + return -1; + } else if (parse_tree_gently(tree, 1)) { + error("bad tree object %s", oid_to_hex(oid)); + return -1; + } + + strbuf_addstr(&path, base_path); + base_len = path.len; + + init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); + while (tree_entry(&desc, &entry)) { + struct type_and_oid_list *list; + struct object *o; + /* Not actually true, but we will ignore submodules later. */ + enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; + + /* Skip submodules. */ + if (S_ISGITLINK(entry.mode)) + continue; + + /* If the caller doesn't want blobs, then don't bother. */ + if (!ctx->info->blobs && type == OBJ_BLOB) + continue; + + if (type == OBJ_TREE) { + struct tree *child = lookup_tree(ctx->repo, &entry.oid); + o = child ? &child->object : NULL; + } else if (type == OBJ_BLOB) { + struct blob *child = lookup_blob(ctx->repo, &entry.oid); + o = child ? &child->object : NULL; + } else { + BUG("invalid type for tree entry: %d", type); + } + + if (!o) { + error(_("failed to find object %s"), + oid_to_hex(&o->oid)); + return -1; + } + + /* Skip this object if already seen. */ + if (o->flags & SEEN) + continue; + o->flags |= SEEN; + + strbuf_setlen(&path, base_len); + strbuf_add(&path, entry.path, entry.pathlen); + + /* + * Trees will end with "/" for concatenation and distinction + * from blobs at the same path. + */ + if (type == OBJ_TREE) + strbuf_addch(&path, '/'); + + if (ctx->info->pl) { + int dtype; + enum pattern_match_result match; + match = path_matches_pattern_list(path.buf, path.len, + path.buf + base_len, &dtype, + ctx->info->pl, + ctx->repo->index); + + if (ctx->info->pl->use_cone_patterns && + match == NOT_MATCHED) + continue; + else if (!ctx->info->pl->use_cone_patterns && + type == OBJ_BLOB && + match != MATCHED) + continue; + } + + if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { + CALLOC_ARRAY(list, 1); + list->type = type; + strmap_put(&ctx->paths_to_lists, path.buf, list); + } + push_to_stack(ctx, path.buf); + + if (!(o->flags & UNINTERESTING)) + list->maybe_interesting = 1; + + oid_array_append(&list->oids, &entry.oid); + } + + free_tree_buffer(tree); + strbuf_release(&path); + return 0; +} + +/* + * For each path in paths_to_explore, walk the trees another level + * and add any found blobs to the batch (but only if they exist and + * haven't been added yet). + */ +static int walk_path(struct path_walk_context *ctx, + const char *path) +{ + struct type_and_oid_list *list; + int ret = 0; + + list = strmap_get(&ctx->paths_to_lists, path); + + if (!list) + BUG("provided path '%s' that had no associated list", path); + + if (!list->oids.nr) + return 0; + + if (ctx->info->prune_all_uninteresting) { + /* + * This is true if all objects were UNINTERESTING + * when added to the list. + */ + if (!list->maybe_interesting) + return 0; + + /* + * But it's still possible that the objects were set + * as UNINTERESTING after being added. Do a quick check. + */ + list->maybe_interesting = 0; + for (size_t i = 0; + !list->maybe_interesting && i < list->oids.nr; + i++) { + if (list->type == OBJ_TREE) { + struct tree *t = lookup_tree(ctx->repo, + &list->oids.oid[i]); + if (t && !(t->object.flags & UNINTERESTING)) + list->maybe_interesting = 1; + } else if (list->type == OBJ_BLOB) { + struct blob *b = lookup_blob(ctx->repo, + &list->oids.oid[i]); + if (b && !(b->object.flags & UNINTERESTING)) + list->maybe_interesting = 1; + } else { + /* Tags are always interesting if visited. */ + list->maybe_interesting = 1; + } + } + + /* We have confirmed that all objects are UNINTERESTING. */ + if (!list->maybe_interesting) + return 0; + } + + /* Evaluate function pointer on this data, if requested. */ + if ((list->type == OBJ_TREE && ctx->info->trees) || + (list->type == OBJ_BLOB && ctx->info->blobs) || + (list->type == OBJ_TAG && ctx->info->tags)) + ret = ctx->info->path_fn(path, &list->oids, list->type, + ctx->info->path_fn_data); + + /* Expand data for children. */ + if (list->type == OBJ_TREE) { + for (size_t i = 0; i < list->oids.nr; i++) { + ret |= add_tree_entries(ctx, + path, + &list->oids.oid[i]); + } + } + + oid_array_clear(&list->oids); + strmap_remove(&ctx->paths_to_lists, path, 1); + return ret; +} + +static void clear_paths_to_lists(struct strmap *map) +{ + struct hashmap_iter iter; + struct strmap_entry *e; + + hashmap_for_each_entry(&map->map, &iter, e, ent) { + struct type_and_oid_list *list = e->value; + oid_array_clear(&list->oids); + } + strmap_clear(map, 1); + strmap_init(map); +} + +static struct repository *edge_repo; +static struct type_and_oid_list *edge_tree_list; + +static void show_edge(struct commit *commit) +{ + struct tree *t = repo_get_commit_tree(edge_repo, commit); + + if (!t) + return; + + if (commit->object.flags & UNINTERESTING) + t->object.flags |= UNINTERESTING; + + if (t->object.flags & SEEN) + return; + t->object.flags |= SEEN; + + oid_array_append(&edge_tree_list->oids, &t->object.oid); +} + +static int setup_pending_objects(struct path_walk_info *info, + struct path_walk_context *ctx) +{ + struct type_and_oid_list *tags = NULL; + struct type_and_oid_list *tagged_blobs = NULL; + struct type_and_oid_list *root_tree_list = NULL; + + if (info->tags) + CALLOC_ARRAY(tags, 1); + if (info->blobs) + CALLOC_ARRAY(tagged_blobs, 1); + if (info->trees) + root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); + + /* + * Pending objects include: + * * Commits at branch tips. + * * Annotated tags at tag tips. + * * Any kind of object at lightweight tag tips. + * * Trees and blobs in the index (with an associated path). + */ + for (size_t i = 0; i < info->revs->pending.nr; i++) { + struct object_array_entry *pending = info->revs->pending.objects + i; + struct object *obj = pending->item; + + /* Commits will be picked up by revision walk. */ + if (obj->type == OBJ_COMMIT) + continue; + + /* Navigate annotated tag object chains. */ + while (obj->type == OBJ_TAG) { + struct tag *tag = lookup_tag(info->revs->repo, &obj->oid); + if (!tag) { + error(_("failed to find tag %s"), + oid_to_hex(&obj->oid)); + return -1; + } + if (tag->object.flags & SEEN) + break; + tag->object.flags |= SEEN; + + if (tags) + oid_array_append(&tags->oids, &obj->oid); + obj = tag->tagged; + } + + if (obj->type == OBJ_TAG) + continue; + + /* We are now at a non-tag object. */ + if (obj->flags & SEEN) + continue; + obj->flags |= SEEN; + + switch (obj->type) { + case OBJ_TREE: + if (!info->trees) + continue; + if (pending->path) { + struct type_and_oid_list *list; + char *path = *pending->path ? xstrfmt("%s/", pending->path) + : xstrdup(""); + if (!(list = strmap_get(&ctx->paths_to_lists, path))) { + CALLOC_ARRAY(list, 1); + list->type = OBJ_TREE; + strmap_put(&ctx->paths_to_lists, path, list); + } + oid_array_append(&list->oids, &obj->oid); + free(path); + } else { + /* assume a root tree, such as a lightweight tag. */ + oid_array_append(&root_tree_list->oids, &obj->oid); + } + break; + + case OBJ_BLOB: + if (!info->blobs) + continue; + if (pending->path) { + struct type_and_oid_list *list; + char *path = pending->path; + if (!(list = strmap_get(&ctx->paths_to_lists, path))) { + CALLOC_ARRAY(list, 1); + list->type = OBJ_BLOB; + strmap_put(&ctx->paths_to_lists, path, list); + } + oid_array_append(&list->oids, &obj->oid); + } else { + /* assume a root tree, such as a lightweight tag. */ + oid_array_append(&tagged_blobs->oids, &obj->oid); + } + break; + + case OBJ_COMMIT: + /* Make sure it is in the object walk */ + if (obj != pending->item) + add_pending_object(info->revs, obj, ""); + break; + + default: + BUG("should not see any other type here"); + } + } + + /* + * Add tag objects and tagged blobs if they exist. + */ + if (tagged_blobs) { + if (tagged_blobs->oids.nr) { + const char *tagged_blob_path = "/tagged-blobs"; + tagged_blobs->type = OBJ_BLOB; + tagged_blobs->maybe_interesting = 1; + strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs); + push_to_stack(ctx, tagged_blob_path); + } else { + oid_array_clear(&tagged_blobs->oids); + free(tagged_blobs); + } + } + if (tags) { + if (tags->oids.nr) { + const char *tag_path = "/tags"; + tags->type = OBJ_TAG; + tags->maybe_interesting = 1; + strmap_put(&ctx->paths_to_lists, tag_path, tags); + push_to_stack(ctx, tag_path); + } else { + oid_array_clear(&tags->oids); + free(tags); + } + } + + return 0; +} + +/** + * Given the configuration of 'info', walk the commits based on 'info->revs' and + * call 'info->path_fn' on each discovered path. + * + * Returns nonzero on an error. + */ +int walk_objects_by_path(struct path_walk_info *info) +{ + int ret; + size_t commits_nr = 0, paths_nr = 0; + struct commit *c; + struct type_and_oid_list *root_tree_list; + struct type_and_oid_list *commit_list; + struct path_walk_context ctx = { + .repo = info->revs->repo, + .revs = info->revs, + .info = info, + .path_stack = { + .compare = compare_by_type, + .cb_data = &ctx + }, + .path_stack_pushed = STRSET_INIT, + .paths_to_lists = STRMAP_INIT + }; + + trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + + CALLOC_ARRAY(commit_list, 1); + commit_list->type = OBJ_COMMIT; + + if (info->tags) + info->revs->tag_objects = 1; + + /* Insert a single list for the root tree into the paths. */ + CALLOC_ARRAY(root_tree_list, 1); + root_tree_list->type = OBJ_TREE; + root_tree_list->maybe_interesting = 1; + strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); + push_to_stack(&ctx, root_path); + + /* + * Set these values before preparing the walk to catch + * lightweight tags pointing to non-commits and indexed objects. + */ + info->revs->blob_objects = info->blobs; + info->revs->tree_objects = info->trees; + + if (prepare_revision_walk(info->revs)) + die(_("failed to setup revision walk")); + + /* + * Walk trees to mark them as UNINTERESTING. + * This is particularly important when 'edge_aggressive' is set. + */ + info->revs->edge_hint_aggressive = info->edge_aggressive; + edge_repo = info->revs->repo; + edge_tree_list = root_tree_list; + mark_edges_uninteresting(info->revs, show_edge, + info->prune_all_uninteresting); + edge_repo = NULL; + edge_tree_list = NULL; + + info->revs->blob_objects = info->revs->tree_objects = 0; + + trace2_region_enter("path-walk", "pending-walk", info->revs->repo); + ret = setup_pending_objects(info, &ctx); + trace2_region_leave("path-walk", "pending-walk", info->revs->repo); + + if (ret) + return ret; + + while ((c = get_revision(info->revs))) { + struct object_id *oid; + struct tree *t; + commits_nr++; + + if (info->commits) + oid_array_append(&commit_list->oids, + &c->object.oid); + + /* If we only care about commits, then skip trees. */ + if (!info->trees && !info->blobs) + continue; + + oid = get_commit_tree_oid(c); + t = lookup_tree(info->revs->repo, oid); + + if (!t) { + error("could not find tree %s", oid_to_hex(oid)); + return -1; + } + + if (t->object.flags & SEEN) + continue; + t->object.flags |= SEEN; + oid_array_append(&root_tree_list->oids, oid); + } + + trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); + trace2_region_leave("path-walk", "commit-walk", info->revs->repo); + + /* Track all commits. */ + if (info->commits && commit_list->oids.nr) + ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT, + info->path_fn_data); + oid_array_clear(&commit_list->oids); + free(commit_list); + + trace2_region_enter("path-walk", "path-walk", info->revs->repo); + while (!ret && ctx.path_stack.nr) { + char *path = prio_queue_get(&ctx.path_stack); + paths_nr++; + + ret = walk_path(&ctx, path); + + free(path); + } + + /* Are there paths remaining? Likely they are from indexed objects. */ + if (!strmap_empty(&ctx.paths_to_lists)) { + struct hashmap_iter iter; + struct strmap_entry *entry; + + strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry) + push_to_stack(&ctx, entry->key); + + while (!ret && ctx.path_stack.nr) { + char *path = prio_queue_get(&ctx.path_stack); + paths_nr++; + + ret = walk_path(&ctx, path); + + free(path); + } + } + + trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); + trace2_region_leave("path-walk", "path-walk", info->revs->repo); + + clear_paths_to_lists(&ctx.paths_to_lists); + strset_clear(&ctx.path_stack_pushed); + clear_prio_queue(&ctx.path_stack); + return ret; +} + +void path_walk_info_init(struct path_walk_info *info) +{ + struct path_walk_info empty = PATH_WALK_INFO_INIT; + memcpy(info, &empty, sizeof(empty)); +} + +void path_walk_info_clear(struct path_walk_info *info) +{ + if (info->pl) { + clear_pattern_list(info->pl); + free(info->pl); + } +} |