diff options
Diffstat (limited to 'builtin/last-modified.c')
| -rw-r--r-- | builtin/last-modified.c | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/builtin/last-modified.c b/builtin/last-modified.c new file mode 100644 index 0000000000..b0ecbdc540 --- /dev/null +++ b/builtin/last-modified.c @@ -0,0 +1,547 @@ +#include "git-compat-util.h" +#include "bloom.h" +#include "builtin.h" +#include "commit-graph.h" +#include "commit-slab.h" +#include "commit.h" +#include "config.h" +#include "diff.h" +#include "diffcore.h" +#include "environment.h" +#include "ewah/ewok.h" +#include "hashmap.h" +#include "hex.h" +#include "object-name.h" +#include "object.h" +#include "parse-options.h" +#include "prio-queue.h" +#include "quote.h" +#include "repository.h" +#include "revision.h" + +/* Remember to update object flag allocation in object.h */ +#define PARENT1 (1u<<16) /* used instead of SEEN */ +#define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */ + +struct last_modified_entry { + struct hashmap_entry hashent; + struct object_id oid; + struct bloom_key key; + size_t diff_idx; + const char path[FLEX_ARRAY]; +}; + +static int last_modified_entry_hashcmp(const void *unused UNUSED, + const struct hashmap_entry *hent1, + const struct hashmap_entry *hent2, + const void *path) +{ + const struct last_modified_entry *ent1 = + container_of(hent1, const struct last_modified_entry, hashent); + const struct last_modified_entry *ent2 = + container_of(hent2, const struct last_modified_entry, hashent); + return strcmp(ent1->path, path ? path : ent2->path); +} + +/* + * Hold a bitmap for each commit we're working with. In the bitmap, each bit + * represents a path in `lm->all_paths`. An active bit indicates the path still + * needs to be associated to a commit. + */ +define_commit_slab(active_paths_for_commit, struct bitmap *); + +struct last_modified { + struct hashmap paths; + struct rev_info rev; + bool recursive; + bool show_trees; + + const char **all_paths; + size_t all_paths_nr; + struct active_paths_for_commit active_paths; + + /* 'scratch' to avoid allocating a bitmap every process_parent() */ + struct bitmap *scratch; +}; + +static struct bitmap *active_paths_for(struct last_modified *lm, struct commit *c) +{ + struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); + if (!*bitmap) + *bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD + 1); + + return *bitmap; +} + +static void active_paths_free(struct last_modified *lm, struct commit *c) +{ + struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); + if (*bitmap) { + bitmap_free(*bitmap); + *bitmap = NULL; + } +} + +static void last_modified_release(struct last_modified *lm) +{ + struct hashmap_iter iter; + struct last_modified_entry *ent; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) + bloom_key_clear(&ent->key); + + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); + release_revisions(&lm->rev); + + free(lm->all_paths); +} + +struct last_modified_callback_data { + struct last_modified *lm; + struct commit *commit; +}; + +static void add_path_from_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *data) +{ + struct last_modified *lm = data; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct last_modified_entry *ent; + const char *path = p->two->path; + + FLEX_ALLOC_STR(ent, path, path); + oidcpy(&ent->oid, &p->two->oid); + if (lm->rev.bloom_filter_settings) + bloom_key_fill(&ent->key, path, strlen(path), + lm->rev.bloom_filter_settings); + hashmap_entry_init(&ent->hashent, strhash(ent->path)); + hashmap_add(&lm->paths, &ent->hashent); + } +} + +static int populate_paths_from_revs(struct last_modified *lm) +{ + int num_interesting = 0; + struct diff_options diffopt; + + /* + * Create a copy of `struct diff_options`. In this copy a callback is + * set that when called adds entries to `paths` in `struct last_modified`. + * This copy is used to diff the tree of the target revision against an + * empty tree. This results in all paths in the target revision being + * listed. After `paths` is populated, we don't need this copy no more. + */ + memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); + copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); + diffopt.output_format = DIFF_FORMAT_CALLBACK; + diffopt.format_callback = add_path_from_diff; + diffopt.format_callback_data = lm; + + for (size_t i = 0; i < lm->rev.pending.nr; i++) { + struct object_array_entry *obj = lm->rev.pending.objects + i; + + if (obj->item->flags & UNINTERESTING) + continue; + + if (num_interesting++) + return error(_("last-modified can only operate on one tree at a time")); + + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &obj->item->oid, "", &diffopt); + diff_flush(&diffopt); + } + clear_pathspec(&diffopt.pathspec); + + return 0; +} + +static void last_modified_emit(struct last_modified *lm, + const char *path, const struct commit *commit) + +{ + if (commit->object.flags & BOUNDARY) + putchar('^'); + printf("%s\t", oid_to_hex(&commit->object.oid)); + + if (lm->rev.diffopt.line_termination) + write_name_quoted(path, stdout, '\n'); + else + printf("%s%c", path, '\0'); +} + +static void mark_path(const char *path, const struct object_id *oid, + struct last_modified_callback_data *data) +{ + struct last_modified_entry *ent; + + /* Is it even a path that we are interested in? */ + ent = hashmap_get_entry_from_hash(&data->lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (!ent) + return; + + /* + * Is it arriving at a version of interest, or is it from a side branch + * which did not contribute to the final state? + */ + if (oid && !oideq(oid, &ent->oid)) + return; + + last_modified_emit(data->lm, path, data->commit); + + hashmap_remove(&data->lm->paths, &ent->hashent, path); + bloom_key_clear(&ent->key); + free(ent); +} + +static void last_modified_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *cbdata) +{ + struct last_modified_callback_data *data = cbdata; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + switch (p->status) { + case DIFF_STATUS_DELETED: + /* + * There's no point in feeding a deletion, as it could + * not have resulted in our current state, which + * actually has the file. + */ + break; + + default: + /* + * Otherwise, we care only that we somehow arrived at + * a final oid state. Note that this covers some + * potentially controversial areas, including: + * + * 1. A rename or copy will be found, as it is the + * first time the content has arrived at the given + * path. + * + * 2. Even a non-content modification like a mode or + * type change will trigger it. + * + * We take the inclusive approach for now, and find + * anything which impacts the path. Options to tweak + * the behavior (e.g., to "--follow" the content across + * renames) can come later. + */ + mark_path(p->two->path, &p->two->oid, data); + break; + } + } +} + +static void pass_to_parent(struct bitmap *c, + struct bitmap *p, + size_t pos) +{ + bitmap_unset(c, pos); + bitmap_set(p, pos); +} + +static bool maybe_changed_path(struct last_modified *lm, + struct commit *origin, + struct bitmap *active) +{ + struct bloom_filter *filter; + struct last_modified_entry *ent; + struct hashmap_iter iter; + + if (!lm->rev.bloom_filter_settings) + return true; + + if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY) + return true; + + filter = get_bloom_filter(lm->rev.repo, origin); + if (!filter) + return true; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (active && !bitmap_get(active, ent->diff_idx)) + continue; + + if (bloom_filter_contains(filter, &ent->key, + lm->rev.bloom_filter_settings)) + return true; + } + return false; +} + +static void process_parent(struct last_modified *lm, + struct prio_queue *queue, + struct commit *c, struct bitmap *active_c, + struct commit *parent, int parent_i) +{ + struct bitmap *active_p; + + repo_parse_commit(lm->rev.repo, parent); + active_p = active_paths_for(lm, parent); + + /* + * The first time entering this function for this commit (i.e. first parent) + * see if Bloom filters will tell us it's worth to do the diff. + */ + if (parent_i || maybe_changed_path(lm, c, active_c)) { + diff_tree_oid(&parent->object.oid, + &c->object.oid, "", &lm->rev.diffopt); + diffcore_std(&lm->rev.diffopt); + } + + /* + * Test each path for TREESAME-ness against the parent. If a path is + * TREESAME, pass it on to this parent. + * + * First, collect all paths that are *not* TREESAME in 'scratch'. + * Then, pass paths that *are* TREESAME and active to the parent. + */ + for (int i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *fp = diff_queued_diff.queue[i]; + const char *path = fp->two->path; + struct last_modified_entry *ent = + hashmap_get_entry_from_hash(&lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (ent) { + size_t k = ent->diff_idx; + if (bitmap_get(active_c, k)) + bitmap_set(lm->scratch, k); + } + } + for (size_t i = 0; i < lm->all_paths_nr; i++) { + if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i)) + pass_to_parent(active_c, active_p, i); + } + + /* + * If parent has any active paths, put it on the queue (if not already). + */ + if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) { + parent->object.flags |= PARENT1; + prio_queue_put(queue, parent); + } + if (!(parent->object.flags & PARENT1)) + active_paths_free(lm, parent); + + memset(lm->scratch->words, 0x0, lm->scratch->word_alloc); + diff_queue_clear(&diff_queued_diff); +} + +static int last_modified_run(struct last_modified *lm) +{ + int max_count, queue_popped = 0; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date }; + struct commit_list *list; + struct last_modified_callback_data data = { .lm = lm }; + + lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; + lm->rev.diffopt.format_callback = last_modified_diff; + lm->rev.diffopt.format_callback_data = &data; + lm->rev.no_walk = 1; + + prepare_revision_walk(&lm->rev); + + max_count = lm->rev.max_count; + + init_active_paths_for_commit(&lm->active_paths); + lm->scratch = bitmap_word_alloc(lm->all_paths_nr); + + /* + * lm->rev.commits holds the set of boundary commits for our walk. + * + * Loop through each such commit, and place it in the appropriate queue. + */ + for (list = lm->rev.commits; list; list = list->next) { + struct commit *c = list->item; + + if (c->object.flags & BOTTOM) { + prio_queue_put(¬_queue, c); + c->object.flags |= PARENT2; + } else if (!(c->object.flags & PARENT1)) { + /* + * If the commit is a starting point (and hasn't been + * seen yet), then initialize the set of interesting + * paths, too. + */ + struct bitmap *active; + + prio_queue_put(&queue, c); + c->object.flags |= PARENT1; + + active = active_paths_for(lm, c); + for (size_t i = 0; i < lm->all_paths_nr; i++) + bitmap_set(active, i); + } + } + + while (queue.nr) { + int parent_i; + struct commit_list *p; + struct commit *c = prio_queue_get(&queue); + struct bitmap *active_c = active_paths_for(lm, c); + + if ((0 <= max_count && max_count < ++queue_popped) || + (c->object.flags & PARENT2)) { + /* + * Either a boundary commit, or we have already seen too + * many others. Either way, stop here. + */ + c->object.flags |= PARENT2 | BOUNDARY; + data.commit = c; + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &c->object.oid, + "", &lm->rev.diffopt); + diff_flush(&lm->rev.diffopt); + goto cleanup; + } + + /* + * Otherwise, make sure that 'c' isn't reachable from anything + * in the '--not' queue. + */ + repo_parse_commit(lm->rev.repo, c); + + while (not_queue.nr) { + struct commit_list *np; + struct commit *n = prio_queue_get(¬_queue); + + repo_parse_commit(lm->rev.repo, n); + + for (np = n->parents; np; np = np->next) { + if (!(np->item->object.flags & PARENT2)) { + prio_queue_put(¬_queue, np->item); + np->item->object.flags |= PARENT2; + } + } + + if (commit_graph_generation(n) < commit_graph_generation(c)) + break; + } + + /* + * Look at each parent and pass on each path that's TREESAME + * with that parent. Stop early when no active paths remain. + */ + for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) { + process_parent(lm, &queue, + c, active_c, + p->item, parent_i); + + if (bitmap_is_empty(active_c)) + break; + } + + /* + * Paths that remain active, or not TREESAME with any parent, + * were changed by 'c'. + */ + if (!bitmap_is_empty(active_c)) { + data.commit = c; + for (size_t i = 0; i < lm->all_paths_nr; i++) { + if (bitmap_get(active_c, i)) + mark_path(lm->all_paths[i], NULL, &data); + } + } + +cleanup: + active_paths_free(lm, c); + } + + if (hashmap_get_size(&lm->paths)) + BUG("paths remaining beyond boundary in last-modified"); + + clear_prio_queue(¬_queue); + clear_prio_queue(&queue); + clear_active_paths_for_commit(&lm->active_paths); + bitmap_free(lm->scratch); + + return 0; +} + +static int last_modified_init(struct last_modified *lm, struct repository *r, + const char *prefix, int argc, const char **argv) +{ + struct hashmap_iter iter; + struct last_modified_entry *ent; + + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); + + repo_init_revisions(r, &lm->rev, prefix); + lm->rev.def = "HEAD"; + lm->rev.combine_merges = 1; + lm->rev.show_root_diff = 1; + lm->rev.boundary = 1; + lm->rev.no_commit_id = 1; + lm->rev.diff = 1; + lm->rev.diffopt.flags.no_recursive_diff_tree_combined = 1; + lm->rev.diffopt.flags.recursive = lm->recursive; + lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; + + argc = setup_revisions(argc, argv, &lm->rev, NULL); + if (argc > 1) { + error(_("unknown last-modified argument: %s"), argv[1]); + return argc; + } + + lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); + + if (populate_paths_from_revs(lm) < 0) + return error(_("unable to setup last-modified")); + + CALLOC_ARRAY(lm->all_paths, hashmap_get_size(&lm->paths)); + lm->all_paths_nr = 0; + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + ent->diff_idx = lm->all_paths_nr++; + lm->all_paths[ent->diff_idx] = ent->path; + } + + return 0; +} + +int cmd_last_modified(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + int ret; + struct last_modified lm = { 0 }; + + const char * const last_modified_usage[] = { + N_("git last-modified [--recursive] [--show-trees] " + "[<revision-range>] [[--] <path>...]"), + NULL + }; + + struct option last_modified_options[] = { + OPT_BOOL('r', "recursive", &lm.recursive, + N_("recurse into subtrees")), + OPT_BOOL('t', "show-trees", &lm.show_trees, + N_("show tree entries when recursing into subtrees")), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, last_modified_options, + last_modified_usage, + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); + + repo_config(repo, git_default_config, NULL); + + ret = last_modified_init(&lm, repo, prefix, argc, argv); + if (ret > 0) + usage_with_options(last_modified_usage, + last_modified_options); + if (ret) + goto out; + + ret = last_modified_run(&lm); + if (ret) + goto out; + +out: + last_modified_release(&lm); + + return ret; +} |
