| #include "git-compat-util.h" |
| #include "bloom.h" |
| #include "builtin.h" |
| #include "commit-graph.h" |
| #include "commit-slab.h" |
| #include "commit.h" |
| #include "config.h" |
| #include "diff.h" |
| #include "diffcore.h" |
| #include "environment.h" |
| #include "ewah/ewok.h" |
| #include "hashmap.h" |
| #include "hex.h" |
| #include "object-name.h" |
| #include "object.h" |
| #include "parse-options.h" |
| #include "prio-queue.h" |
| #include "quote.h" |
| #include "repository.h" |
| #include "revision.h" |
| |
| /* Remember to update object flag allocation in object.h */ |
| #define PARENT1 (1u<<16) /* used instead of SEEN */ |
| #define PARENT2 (1u<<17) /* used instead of BOTTOM, BOUNDARY */ |
| |
| struct last_modified_entry { |
| struct hashmap_entry hashent; |
| struct object_id oid; |
| struct bloom_key key; |
| size_t diff_idx; |
| const char path[FLEX_ARRAY]; |
| }; |
| |
| static int last_modified_entry_hashcmp(const void *unused UNUSED, |
| const struct hashmap_entry *hent1, |
| const struct hashmap_entry *hent2, |
| const void *path) |
| { |
| const struct last_modified_entry *ent1 = |
| container_of(hent1, const struct last_modified_entry, hashent); |
| const struct last_modified_entry *ent2 = |
| container_of(hent2, const struct last_modified_entry, hashent); |
| return strcmp(ent1->path, path ? path : ent2->path); |
| } |
| |
| /* |
| * Hold a bitmap for each commit we're working with. In the bitmap, each bit |
| * represents a path in `lm->all_paths`. An active bit indicates the path still |
| * needs to be associated to a commit. |
| */ |
| define_commit_slab(active_paths_for_commit, struct bitmap *); |
| |
| struct last_modified { |
| struct hashmap paths; |
| struct rev_info rev; |
| bool recursive; |
| bool show_trees; |
| |
| const char **all_paths; |
| size_t all_paths_nr; |
| struct active_paths_for_commit active_paths; |
| |
| /* 'scratch' to avoid allocating a bitmap every process_parent() */ |
| struct bitmap *scratch; |
| }; |
| |
| static struct bitmap *active_paths_for(struct last_modified *lm, struct commit *c) |
| { |
| struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); |
| if (!*bitmap) |
| *bitmap = bitmap_word_alloc(lm->all_paths_nr / BITS_IN_EWORD + 1); |
| |
| return *bitmap; |
| } |
| |
| static void active_paths_free(struct last_modified *lm, struct commit *c) |
| { |
| struct bitmap **bitmap = active_paths_for_commit_at(&lm->active_paths, c); |
| if (*bitmap) { |
| bitmap_free(*bitmap); |
| *bitmap = NULL; |
| } |
| } |
| |
| static void last_modified_release(struct last_modified *lm) |
| { |
| struct hashmap_iter iter; |
| struct last_modified_entry *ent; |
| |
| hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) |
| bloom_key_clear(&ent->key); |
| |
| hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); |
| release_revisions(&lm->rev); |
| |
| free(lm->all_paths); |
| } |
| |
| struct last_modified_callback_data { |
| struct last_modified *lm; |
| struct commit *commit; |
| }; |
| |
| static void add_path_from_diff(struct diff_queue_struct *q, |
| struct diff_options *opt UNUSED, void *data) |
| { |
| struct last_modified *lm = data; |
| |
| for (int i = 0; i < q->nr; i++) { |
| struct diff_filepair *p = q->queue[i]; |
| struct last_modified_entry *ent; |
| const char *path = p->two->path; |
| |
| FLEX_ALLOC_STR(ent, path, path); |
| oidcpy(&ent->oid, &p->two->oid); |
| if (lm->rev.bloom_filter_settings) |
| bloom_key_fill(&ent->key, path, strlen(path), |
| lm->rev.bloom_filter_settings); |
| hashmap_entry_init(&ent->hashent, strhash(ent->path)); |
| hashmap_add(&lm->paths, &ent->hashent); |
| } |
| } |
| |
| static int populate_paths_from_revs(struct last_modified *lm) |
| { |
| int num_interesting = 0; |
| struct diff_options diffopt; |
| |
| /* |
| * Create a copy of `struct diff_options`. In this copy a callback is |
| * set that when called adds entries to `paths` in `struct last_modified`. |
| * This copy is used to diff the tree of the target revision against an |
| * empty tree. This results in all paths in the target revision being |
| * listed. After `paths` is populated, we don't need this copy no more. |
| */ |
| memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); |
| copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); |
| diffopt.output_format = DIFF_FORMAT_CALLBACK; |
| diffopt.format_callback = add_path_from_diff; |
| diffopt.format_callback_data = lm; |
| |
| for (size_t i = 0; i < lm->rev.pending.nr; i++) { |
| struct object_array_entry *obj = lm->rev.pending.objects + i; |
| |
| if (obj->item->flags & UNINTERESTING) |
| continue; |
| |
| if (num_interesting++) |
| return error(_("last-modified can only operate on one tree at a time")); |
| |
| diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, |
| &obj->item->oid, "", &diffopt); |
| diff_flush(&diffopt); |
| } |
| clear_pathspec(&diffopt.pathspec); |
| |
| return 0; |
| } |
| |
| static void last_modified_emit(struct last_modified *lm, |
| const char *path, const struct commit *commit) |
| |
| { |
| if (commit->object.flags & BOUNDARY) |
| putchar('^'); |
| printf("%s\t", oid_to_hex(&commit->object.oid)); |
| |
| if (lm->rev.diffopt.line_termination) |
| write_name_quoted(path, stdout, '\n'); |
| else |
| printf("%s%c", path, '\0'); |
| } |
| |
| static void mark_path(const char *path, const struct object_id *oid, |
| struct last_modified_callback_data *data) |
| { |
| struct last_modified_entry *ent; |
| |
| /* Is it even a path that we are interested in? */ |
| ent = hashmap_get_entry_from_hash(&data->lm->paths, strhash(path), path, |
| struct last_modified_entry, hashent); |
| if (!ent) |
| return; |
| |
| /* |
| * Is it arriving at a version of interest, or is it from a side branch |
| * which did not contribute to the final state? |
| */ |
| if (oid && !oideq(oid, &ent->oid)) |
| return; |
| |
| last_modified_emit(data->lm, path, data->commit); |
| |
| hashmap_remove(&data->lm->paths, &ent->hashent, path); |
| bloom_key_clear(&ent->key); |
| free(ent); |
| } |
| |
| static void last_modified_diff(struct diff_queue_struct *q, |
| struct diff_options *opt UNUSED, void *cbdata) |
| { |
| struct last_modified_callback_data *data = cbdata; |
| |
| for (int i = 0; i < q->nr; i++) { |
| struct diff_filepair *p = q->queue[i]; |
| switch (p->status) { |
| case DIFF_STATUS_DELETED: |
| /* |
| * There's no point in feeding a deletion, as it could |
| * not have resulted in our current state, which |
| * actually has the file. |
| */ |
| break; |
| |
| default: |
| /* |
| * Otherwise, we care only that we somehow arrived at |
| * a final oid state. Note that this covers some |
| * potentially controversial areas, including: |
| * |
| * 1. A rename or copy will be found, as it is the |
| * first time the content has arrived at the given |
| * path. |
| * |
| * 2. Even a non-content modification like a mode or |
| * type change will trigger it. |
| * |
| * We take the inclusive approach for now, and find |
| * anything which impacts the path. Options to tweak |
| * the behavior (e.g., to "--follow" the content across |
| * renames) can come later. |
| */ |
| mark_path(p->two->path, &p->two->oid, data); |
| break; |
| } |
| } |
| } |
| |
| static void pass_to_parent(struct bitmap *c, |
| struct bitmap *p, |
| size_t pos) |
| { |
| bitmap_unset(c, pos); |
| bitmap_set(p, pos); |
| } |
| |
| static bool maybe_changed_path(struct last_modified *lm, |
| struct commit *origin, |
| struct bitmap *active) |
| { |
| struct bloom_filter *filter; |
| struct last_modified_entry *ent; |
| struct hashmap_iter iter; |
| |
| if (!lm->rev.bloom_filter_settings) |
| return true; |
| |
| if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY) |
| return true; |
| |
| filter = get_bloom_filter(lm->rev.repo, origin); |
| if (!filter) |
| return true; |
| |
| hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { |
| if (active && !bitmap_get(active, ent->diff_idx)) |
| continue; |
| |
| if (bloom_filter_contains(filter, &ent->key, |
| lm->rev.bloom_filter_settings)) |
| return true; |
| } |
| return false; |
| } |
| |
| static void process_parent(struct last_modified *lm, |
| struct prio_queue *queue, |
| struct commit *c, struct bitmap *active_c, |
| struct commit *parent, int parent_i) |
| { |
| struct bitmap *active_p; |
| |
| repo_parse_commit(lm->rev.repo, parent); |
| active_p = active_paths_for(lm, parent); |
| |
| /* |
| * The first time entering this function for this commit (i.e. first parent) |
| * see if Bloom filters will tell us it's worth to do the diff. |
| */ |
| if (parent_i || maybe_changed_path(lm, c, active_c)) { |
| diff_tree_oid(&parent->object.oid, |
| &c->object.oid, "", &lm->rev.diffopt); |
| diffcore_std(&lm->rev.diffopt); |
| } |
| |
| /* |
| * Test each path for TREESAME-ness against the parent. If a path is |
| * TREESAME, pass it on to this parent. |
| * |
| * First, collect all paths that are *not* TREESAME in 'scratch'. |
| * Then, pass paths that *are* TREESAME and active to the parent. |
| */ |
| for (int i = 0; i < diff_queued_diff.nr; i++) { |
| struct diff_filepair *fp = diff_queued_diff.queue[i]; |
| const char *path = fp->two->path; |
| struct last_modified_entry *ent = |
| hashmap_get_entry_from_hash(&lm->paths, strhash(path), path, |
| struct last_modified_entry, hashent); |
| if (ent) { |
| size_t k = ent->diff_idx; |
| if (bitmap_get(active_c, k)) |
| bitmap_set(lm->scratch, k); |
| } |
| } |
| for (size_t i = 0; i < lm->all_paths_nr; i++) { |
| if (bitmap_get(active_c, i) && !bitmap_get(lm->scratch, i)) |
| pass_to_parent(active_c, active_p, i); |
| } |
| |
| /* |
| * If parent has any active paths, put it on the queue (if not already). |
| */ |
| if (!bitmap_is_empty(active_p) && !(parent->object.flags & PARENT1)) { |
| parent->object.flags |= PARENT1; |
| prio_queue_put(queue, parent); |
| } |
| if (!(parent->object.flags & PARENT1)) |
| active_paths_free(lm, parent); |
| |
| memset(lm->scratch->words, 0x0, lm->scratch->word_alloc); |
| diff_queue_clear(&diff_queued_diff); |
| } |
| |
| static int last_modified_run(struct last_modified *lm) |
| { |
| int max_count, queue_popped = 0; |
| struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; |
| struct prio_queue not_queue = { compare_commits_by_gen_then_commit_date }; |
| struct commit_list *list; |
| struct last_modified_callback_data data = { .lm = lm }; |
| |
| lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; |
| lm->rev.diffopt.format_callback = last_modified_diff; |
| lm->rev.diffopt.format_callback_data = &data; |
| lm->rev.no_walk = 1; |
| |
| prepare_revision_walk(&lm->rev); |
| |
| max_count = lm->rev.max_count; |
| |
| init_active_paths_for_commit(&lm->active_paths); |
| lm->scratch = bitmap_word_alloc(lm->all_paths_nr); |
| |
| /* |
| * lm->rev.commits holds the set of boundary commits for our walk. |
| * |
| * Loop through each such commit, and place it in the appropriate queue. |
| */ |
| for (list = lm->rev.commits; list; list = list->next) { |
| struct commit *c = list->item; |
| |
| if (c->object.flags & BOTTOM) { |
| prio_queue_put(¬_queue, c); |
| c->object.flags |= PARENT2; |
| } else if (!(c->object.flags & PARENT1)) { |
| /* |
| * If the commit is a starting point (and hasn't been |
| * seen yet), then initialize the set of interesting |
| * paths, too. |
| */ |
| struct bitmap *active; |
| |
| prio_queue_put(&queue, c); |
| c->object.flags |= PARENT1; |
| |
| active = active_paths_for(lm, c); |
| for (size_t i = 0; i < lm->all_paths_nr; i++) |
| bitmap_set(active, i); |
| } |
| } |
| |
| while (queue.nr) { |
| int parent_i; |
| struct commit_list *p; |
| struct commit *c = prio_queue_get(&queue); |
| struct bitmap *active_c = active_paths_for(lm, c); |
| |
| if ((0 <= max_count && max_count < ++queue_popped) || |
| (c->object.flags & PARENT2)) { |
| /* |
| * Either a boundary commit, or we have already seen too |
| * many others. Either way, stop here. |
| */ |
| c->object.flags |= PARENT2 | BOUNDARY; |
| data.commit = c; |
| diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, |
| &c->object.oid, |
| "", &lm->rev.diffopt); |
| diff_flush(&lm->rev.diffopt); |
| goto cleanup; |
| } |
| |
| /* |
| * Otherwise, make sure that 'c' isn't reachable from anything |
| * in the '--not' queue. |
| */ |
| repo_parse_commit(lm->rev.repo, c); |
| |
| while (not_queue.nr) { |
| struct commit_list *np; |
| struct commit *n = prio_queue_get(¬_queue); |
| |
| repo_parse_commit(lm->rev.repo, n); |
| |
| for (np = n->parents; np; np = np->next) { |
| if (!(np->item->object.flags & PARENT2)) { |
| prio_queue_put(¬_queue, np->item); |
| np->item->object.flags |= PARENT2; |
| } |
| } |
| |
| if (commit_graph_generation(n) < commit_graph_generation(c)) |
| break; |
| } |
| |
| /* |
| * Look at each parent and pass on each path that's TREESAME |
| * with that parent. Stop early when no active paths remain. |
| */ |
| for (p = c->parents, parent_i = 0; p; p = p->next, parent_i++) { |
| process_parent(lm, &queue, |
| c, active_c, |
| p->item, parent_i); |
| |
| if (bitmap_is_empty(active_c)) |
| break; |
| } |
| |
| /* |
| * Paths that remain active, or not TREESAME with any parent, |
| * were changed by 'c'. |
| */ |
| if (!bitmap_is_empty(active_c)) { |
| data.commit = c; |
| for (size_t i = 0; i < lm->all_paths_nr; i++) { |
| if (bitmap_get(active_c, i)) |
| mark_path(lm->all_paths[i], NULL, &data); |
| } |
| } |
| |
| cleanup: |
| active_paths_free(lm, c); |
| } |
| |
| if (hashmap_get_size(&lm->paths)) |
| BUG("paths remaining beyond boundary in last-modified"); |
| |
| clear_prio_queue(¬_queue); |
| clear_prio_queue(&queue); |
| clear_active_paths_for_commit(&lm->active_paths); |
| bitmap_free(lm->scratch); |
| |
| return 0; |
| } |
| |
| static int last_modified_init(struct last_modified *lm, struct repository *r, |
| const char *prefix, int argc, const char **argv) |
| { |
| struct hashmap_iter iter; |
| struct last_modified_entry *ent; |
| |
| hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); |
| |
| repo_init_revisions(r, &lm->rev, prefix); |
| lm->rev.def = "HEAD"; |
| lm->rev.combine_merges = 1; |
| lm->rev.show_root_diff = 1; |
| lm->rev.boundary = 1; |
| lm->rev.no_commit_id = 1; |
| lm->rev.diff = 1; |
| lm->rev.diffopt.flags.no_recursive_diff_tree_combined = 1; |
| lm->rev.diffopt.flags.recursive = lm->recursive; |
| lm->rev.diffopt.flags.tree_in_recursive = lm->show_trees; |
| |
| argc = setup_revisions(argc, argv, &lm->rev, NULL); |
| if (argc > 1) { |
| error(_("unknown last-modified argument: %s"), argv[1]); |
| return argc; |
| } |
| |
| lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); |
| |
| if (populate_paths_from_revs(lm) < 0) |
| return error(_("unable to setup last-modified")); |
| |
| CALLOC_ARRAY(lm->all_paths, hashmap_get_size(&lm->paths)); |
| lm->all_paths_nr = 0; |
| hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { |
| ent->diff_idx = lm->all_paths_nr++; |
| lm->all_paths[ent->diff_idx] = ent->path; |
| } |
| |
| return 0; |
| } |
| |
| int cmd_last_modified(int argc, const char **argv, const char *prefix, |
| struct repository *repo) |
| { |
| int ret; |
| struct last_modified lm = { 0 }; |
| |
| const char * const last_modified_usage[] = { |
| N_("git last-modified [--recursive] [--show-trees] " |
| "[<revision-range>] [[--] <path>...]"), |
| NULL |
| }; |
| |
| struct option last_modified_options[] = { |
| OPT_BOOL('r', "recursive", &lm.recursive, |
| N_("recurse into subtrees")), |
| OPT_BOOL('t', "show-trees", &lm.show_trees, |
| N_("show tree entries when recursing into subtrees")), |
| OPT_END() |
| }; |
| |
| argc = parse_options(argc, argv, prefix, last_modified_options, |
| last_modified_usage, |
| PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); |
| |
| repo_config(repo, git_default_config, NULL); |
| |
| ret = last_modified_init(&lm, repo, prefix, argc, argv); |
| if (ret > 0) |
| usage_with_options(last_modified_usage, |
| last_modified_options); |
| if (ret) |
| goto out; |
| |
| ret = last_modified_run(&lm); |
| if (ret) |
| goto out; |
| |
| out: |
| last_modified_release(&lm); |
| |
| return ret; |
| } |