diff options
Diffstat (limited to 'sparse-index.c')
-rw-r--r-- | sparse-index.c | 260 |
1 files changed, 203 insertions, 57 deletions
diff --git a/sparse-index.c b/sparse-index.c index 1fdb07a9e6..2107840bfc 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -1,5 +1,8 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "git-compat-util.h" #include "environment.h" +#include "ewah/ewok.h" #include "gettext.h" #include "name-hash.h" #include "read-cache-ll.h" @@ -12,6 +15,23 @@ #include "config.h" #include "dir.h" #include "fsmonitor-ll.h" +#include "advice.h" + +/** + * This global is used by expand_index() to determine if we should give the + * advice for advice.sparseIndexExpanded when expanding a sparse index to a full + * one. However, this is sometimes done on purpose, such as in the sparse-checkout + * builtin, even when index.sparse=false. This may be disabled in + * convert_to_sparse() or by commands that know they will lead to a full + * expansion, but this message is not actionable. + */ +int give_advice_on_expansion = 1; +#define ADVICE_MSG \ + "The sparse index is expanding to a full index, a slow operation.\n" \ + "Your working directory likely has contents that are outside of\n" \ + "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \ + "see your sparse-checkout definition and compare it to your working\n" \ + "directory contents. Running 'git clean' may assist in this cleanup." struct modify_index_context { struct index_state *write; @@ -184,6 +204,12 @@ int convert_to_sparse(struct index_state *istate, int flags) return 0; /* + * If we are purposefully collapsing a full index, then don't give + * advice when it is expanded later. + */ + give_advice_on_expansion = 0; + + /* * NEEDSWORK: If we have unmerged entries, then stay full. * Unmerged entries prevent the cache-tree extension from working. */ @@ -217,7 +243,8 @@ int convert_to_sparse(struct index_state *istate, int flags) cache_tree_update(istate, 0); istate->fsmonitor_has_run_once = 0; - FREE_AND_NULL(istate->fsmonitor_dirty); + ewah_free(istate->fsmonitor_dirty); + istate->fsmonitor_dirty = NULL; FREE_AND_NULL(istate->fsmonitor_last_update); istate->sparse_index = INDEX_COLLAPSED; @@ -328,6 +355,12 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) pl = NULL; } + if (!pl && give_advice_on_expansion) { + give_advice_on_expansion = 0; + advise_if_enabled(ADVICE_SPARSE_INDEX_EXPANDED, + _(ADVICE_MSG)); + } + /* * A NULL pattern set indicates we are expanding a full index, so * we use a special region name that indicates the full expansion. @@ -391,7 +424,7 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) strbuf_setlen(&base, 0); strbuf_add(&base, ce->name, strlen(ce->name)); - read_tree_at(istate->repo, tree, &base, &ps, + read_tree_at(istate->repo, tree, &base, 0, &ps, add_path_to_index, &ctx); /* free directory entries. full entries are re-used */ @@ -407,7 +440,8 @@ void expand_index(struct index_state *istate, struct pattern_list *pl) istate->cache_nr = full->cache_nr; istate->cache_alloc = full->cache_alloc; istate->fsmonitor_has_run_once = 0; - FREE_AND_NULL(istate->fsmonitor_dirty); + ewah_free(istate->fsmonitor_dirty); + istate->fsmonitor_dirty = NULL; FREE_AND_NULL(istate->fsmonitor_last_update); strbuf_release(&base); @@ -439,96 +473,208 @@ void ensure_correct_sparsity(struct index_state *istate) ensure_full_index(istate); } -static int path_found(const char *path, const char **dirname, size_t *dir_len, - int *dir_found) +struct path_found_data { + /** + * The path stored in 'dir', if non-empty, corresponds to the most- + * recent path that we checked where: + * + * 1. The path should be a directory, according to the index. + * 2. The path does not exist. + * 3. The parent path _does_ exist. (This may be the root of the + * working directory.) + */ + struct strbuf dir; + size_t lstat_count; +}; + +#define PATH_FOUND_DATA_INIT { \ + .dir = STRBUF_INIT \ +} + +static void clear_path_found_data(struct path_found_data *data) +{ + strbuf_release(&data->dir); +} + +/** + * Return the length of the longest common substring that ends in a + * slash ('/') to indicate the longest common parent directory. Returns + * zero if no common directory exists. + */ +static size_t max_common_dir_prefix(const char *path1, const char *path2) +{ + size_t common_prefix = 0; + for (size_t i = 0; path1[i] && path2[i]; i++) { + if (path1[i] != path2[i]) + break; + + /* + * If they agree at a directory separator, then add one + * to make sure it is included in the common prefix string. + */ + if (path1[i] == '/') + common_prefix = i + 1; + } + + return common_prefix; +} + +static int path_found(const char *path, struct path_found_data *data) { struct stat st; - char *newdir; - char *tmp; + size_t common_prefix; /* - * If dirname corresponds to a directory that doesn't exist, and this - * path starts with dirname, then path can't exist. + * If data->dir is non-empty, then it contains a path that doesn't + * exist, including an ending slash ('/'). If it is a prefix of 'path', + * then we can return 0. */ - if (!*dir_found && !memcmp(path, *dirname, *dir_len)) + if (data->dir.len && !memcmp(path, data->dir.buf, data->dir.len)) return 0; /* - * If path itself exists, return 1. + * Otherwise, we must check if the current path exists. If it does, then + * return 1. The cached directory will be skipped until we come across + * a missing path again. */ + data->lstat_count++; if (!lstat(path, &st)) return 1; /* - * Otherwise, path does not exist so we'll return 0...but we'll first - * determine some info about its parent directory so we can avoid - * lstat calls for future cache entries. + * At this point, we know that 'path' doesn't exist, and we know that + * the parent directory of 'data->dir' does exist. Let's set 'data->dir' + * to be the top-most non-existing directory of 'path'. If the first + * parent of 'path' exists, then we will act as though 'path' + * corresponds to a directory (by adding a slash). */ - newdir = strrchr(path, '/'); - if (!newdir) - return 0; /* Didn't find a parent dir; just return 0 now. */ + common_prefix = max_common_dir_prefix(path, data->dir.buf); /* - * If path starts with directory (which we already lstat'ed and found), - * then no need to lstat parent directory again. + * At this point, 'path' and 'data->dir' have a common existing parent + * directory given by path[0..common_prefix] (which could have length 0). + * We "grow" the data->dir buffer by checking for existing directories + * along 'path'. */ - if (*dir_found && *dirname && memcmp(path, *dirname, *dir_len)) - return 0; - /* Free previous dirname, and cache path's dirname */ - *dirname = path; - *dir_len = newdir - path + 1; + strbuf_setlen(&data->dir, common_prefix); + while (1) { + /* Find the next directory in 'path'. */ + const char *rest = path + data->dir.len; + const char *next_slash = strchr(rest, '/'); + + /* + * If there are no more slashes, then 'path' doesn't contain a + * non-existent _parent_ directory. Set 'data->dir' to be equal + * to 'path' plus an additional slash, so it can be used for + * caching in the future. The filename of 'path' is considered + * a non-existent directory. + * + * Note: if "{path}/" exists as a directory, then it will never + * appear as a prefix of other callers to this method, assuming + * the context from the clear_skip_worktree... methods. If this + * method is reused, then this must be reconsidered. + */ + if (!next_slash) { + strbuf_addstr(&data->dir, rest); + strbuf_addch(&data->dir, '/'); + break; + } - tmp = xstrndup(path, *dir_len); - *dir_found = !lstat(tmp, &st); - free(tmp); + /* + * Now that we have a slash, let's grow 'data->dir' to include + * this slash, then test if we should stop. + */ + strbuf_add(&data->dir, rest, next_slash - rest + 1); + + /* If the parent dir doesn't exist, then stop here. */ + data->lstat_count++; + if (lstat(data->dir.buf, &st)) + return 0; + } + /* + * At this point, 'data->dir' is equal to 'path' plus a slash character, + * and the parent directory of 'path' definitely exists. Moreover, we + * know that 'path' doesn't exist, or we would have returned 1 earlier. + */ return 0; } -void clear_skip_worktree_from_present_files(struct index_state *istate) +static int clear_skip_worktree_from_present_files_sparse(struct index_state *istate) { - const char *last_dirname = NULL; - size_t dir_len = 0; - int dir_found = 1; - - int i; - int path_count[2] = {0, 0}; - int restarted = 0; + struct path_found_data data = PATH_FOUND_DATA_INIT; - if (!core_apply_sparse_checkout || - sparse_expect_files_outside_of_patterns) - return; + int path_count = 0; + int to_restart = 0; - trace2_region_enter("index", "clear_skip_worktree_from_present_files", + trace2_region_enter("index", "clear_skip_worktree_from_present_files_sparse", istate->repo); -restart: - for (i = 0; i < istate->cache_nr; i++) { + for (int i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce = istate->cache[i]; if (ce_skip_worktree(ce)) { - path_count[restarted]++; - if (path_found(ce->name, &last_dirname, &dir_len, &dir_found)) { + path_count++; + if (path_found(ce->name, &data)) { if (S_ISSPARSEDIR(ce->ce_mode)) { - if (restarted) - BUG("ensure-full-index did not fully flatten?"); - ensure_full_index(istate); - restarted = 1; - goto restart; + to_restart = 1; + break; } ce->ce_flags &= ~CE_SKIP_WORKTREE; } } } - if (path_count[0]) - trace2_data_intmax("index", istate->repo, - "sparse_path_count", path_count[0]); - if (restarted) - trace2_data_intmax("index", istate->repo, - "sparse_path_count_full", path_count[1]); - trace2_region_leave("index", "clear_skip_worktree_from_present_files", + trace2_data_intmax("index", istate->repo, + "sparse_path_count", path_count); + trace2_data_intmax("index", istate->repo, + "sparse_lstat_count", data.lstat_count); + trace2_region_leave("index", "clear_skip_worktree_from_present_files_sparse", istate->repo); + clear_path_found_data(&data); + return to_restart; +} + +static void clear_skip_worktree_from_present_files_full(struct index_state *istate) +{ + struct path_found_data data = PATH_FOUND_DATA_INIT; + + int path_count = 0; + + trace2_region_enter("index", "clear_skip_worktree_from_present_files_full", + istate->repo); + for (int i = 0; i < istate->cache_nr; i++) { + struct cache_entry *ce = istate->cache[i]; + + if (S_ISSPARSEDIR(ce->ce_mode)) + BUG("ensure-full-index did not fully flatten?"); + + if (ce_skip_worktree(ce)) { + path_count++; + if (path_found(ce->name, &data)) + ce->ce_flags &= ~CE_SKIP_WORKTREE; + } + } + + trace2_data_intmax("index", istate->repo, + "full_path_count", path_count); + trace2_data_intmax("index", istate->repo, + "full_lstat_count", data.lstat_count); + trace2_region_leave("index", "clear_skip_worktree_from_present_files_full", + istate->repo); + clear_path_found_data(&data); +} + +void clear_skip_worktree_from_present_files(struct index_state *istate) +{ + if (!core_apply_sparse_checkout || + sparse_expect_files_outside_of_patterns) + return; + + if (clear_skip_worktree_from_present_files_sparse(istate)) { + ensure_full_index(istate); + clear_skip_worktree_from_present_files_full(istate); + } } /* @@ -579,8 +725,9 @@ void expand_to_path(struct index_state *istate, replace++; temp = *replace; *replace = '\0'; + substr_len = replace - path_mutable.buf; if (index_file_exists(istate, path_mutable.buf, - path_mutable.len, icase)) { + substr_len, icase)) { /* * We found a parent directory in the name-hash * hashtable, because only sparse directory entries @@ -593,7 +740,6 @@ void expand_to_path(struct index_state *istate, } *replace = temp; - substr_len = replace - path_mutable.buf; } cleanup: |