From 658dd48c8572d0db49719cbef6605d384621d87c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2009 14:57:30 -0700 Subject: Avoid unnecessary 'lstat()' calls in 'get_stat_data()' When we ask get_stat_data() to get the mode and size of an index entry, we can avoid the lstat() call if we have marked the index entry as being uptodate due to earlier lstat() calls. This avoids a lot of unnecessary lstat() calls in eg 'git checkout', where the last phase shows the differences to the working tree (requiring a diff), but earlier phases have already verified the index. On the kernel repo (with a fast machine and everything cached), this changes timings of a nul 'git checkout' from - Before (best of ten): 0.14user 0.05system 0:00.19elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+13237minor)pagefaults 0swaps - After 0.11user 0.03system 0:00.15elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+13235minor)pagefaults 0swaps so it can obviously be noticeable, although equally obviously it's not a show-stopper on this particular machine. The difference is likely larger on slower machines, or with operating systems that don't do as good a job of name caching. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ae96c64ca2..d230efc146 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -222,7 +222,7 @@ static int get_stat_data(struct cache_entry *ce, const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; - if (!cached) { + if (!cached && !ce_uptodate(ce)) { int changed; struct stat st; changed = check_removed(ce, &st); -- cgit v1.2.3 From b65982b60876c8f5f4d3b2898d5174f4812552b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 20 May 2009 15:57:22 -0700 Subject: Optimize "diff-index --cached" using cache-tree When running "diff-index --cached" after making a change to only a small portion of the index, there is no point unpacking unchanged subtrees into the index recursively, only to find that all entries match anyway. Tweak unpack_trees() logic that is used to read in the tree object to catch the case where the tree entry we are looking at matches the index as a whole by looking at the cache-tree. As an exercise, after modifying a few paths in the kernel tree, here are a few numbers on my Athlon 64X2 3800+: (without patch, hot cache) $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+9407minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+2446minor)pagefaults 0swaps Cold cache numbers are very impressive, but it does not matter very much in practice: (without patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k 247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps (with patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k 18440inputs+0outputs (79major+2369minor)pagefaults 0swaps This of course helps "git status" as well. (without patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+10970minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+3921minor)pagefaults 0swaps Signed-off-by: Junio C Hamano --- cache-tree.c | 32 ++++++++++++++++++++++++++++++++ cache-tree.h | 3 +++ diff-lib.c | 2 ++ unpack-trees.c | 17 +++++++++++++++++ unpack-trees.h | 1 + 5 files changed, 55 insertions(+) (limited to 'diff-lib.c') diff --git a/cache-tree.c b/cache-tree.c index 5481e43340..16a65dfac1 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -631,3 +631,35 @@ void prime_cache_tree(struct cache_tree **it, struct tree *tree) *it = cache_tree(); prime_cache_tree_rec(*it, tree); } + +/* + * find the cache_tree that corresponds to the current level without + * exploding the full path into textual form. The root of the + * cache tree is given as "root", and our current level is "info". + * (1) When at root level, info->prev is NULL, so it is "root" itself. + * (2) Otherwise, find the cache_tree that corresponds to one level + * above us, and find ourselves in there. + */ +static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root, + struct traverse_info *info) +{ + struct cache_tree *our_parent; + + if (!info->prev) + return root; + our_parent = find_cache_tree_from_traversal(root, info->prev); + return cache_tree_find(our_parent, info->name.path); +} + +int cache_tree_matches_traversal(struct cache_tree *root, + struct name_entry *ent, + struct traverse_info *info) +{ + struct cache_tree *it; + + it = find_cache_tree_from_traversal(root, info); + it = cache_tree_find(it, ent->path); + if (it && it->entry_count > 0 && !hashcmp(ent->sha1, it->sha1)) + return it->entry_count; + return 0; +} diff --git a/cache-tree.h b/cache-tree.h index eadcad8adc..3df641f593 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -2,6 +2,7 @@ #define CACHE_TREE_H #include "tree.h" +#include "tree-walk.h" struct cache_tree; struct cache_tree_sub { @@ -42,4 +43,6 @@ int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int, int) int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix); void prime_cache_tree(struct cache_tree **, struct tree *); +extern int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, struct traverse_info *info); + #endif diff --git a/diff-lib.c b/diff-lib.c index a310fb2ad0..1cb97af22d 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -446,6 +446,7 @@ int run_diff_index(struct rev_info *revs, int cached) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; + opts.diff_index_cached = cached; opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; @@ -502,6 +503,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; + opts.diff_index_cached = 1; opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; diff --git a/unpack-trees.c b/unpack-trees.c index aaacaf1015..8eb3ddb392 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -326,6 +326,23 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str if (src[0]) conflicts |= 1; } + + /* special case: "diff-index --cached" looking at a tree */ + if (o->diff_index_cached && + n == 1 && dirmask == 1 && S_ISDIR(names->mode)) { + int matches; + matches = cache_tree_matches_traversal(o->src_index->cache_tree, + names, info); + /* + * Everything under the name matches. Adjust o->pos to + * skip the entire hierarchy. + */ + if (matches) { + o->pos += matches; + return mask; + } + } + if (traverse_trees_recursive(n, dirmask, conflicts, names, info) < 0) return -1; diff --git a/unpack-trees.h b/unpack-trees.h index 0d26f3d73e..1e0e2325f1 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -27,6 +27,7 @@ struct unpack_trees_options { aggressive:1, skip_unmerged:1, initial_checkout:1, + diff_index_cached:1, gently:1; const char *prefix; int pos; -- cgit v1.2.3 From a0919ced8a5efe938cf97c74a0f851cbbe00aaf6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 22 May 2009 23:14:25 -0700 Subject: Avoid "diff-index --cached" optimization under --find-copies-harder When find-copies-harder is in effect, the diff frontends are expected to feed all paths, not just changed paths, to the diffcore, so that copy sources can be picked up. In such a case, not descending into subtrees using the cache-tree information is simply wrong. Signed-off-by: Junio C Hamano --- diff-lib.c | 5 +++-- t/t4007-rename-3.sh | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 1cb97af22d..ae75eacbcc 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -446,7 +446,8 @@ int run_diff_index(struct rev_info *revs, int cached) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; - opts.diff_index_cached = cached; + opts.diff_index_cached = (cached && + !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)); opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; @@ -503,7 +504,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; - opts.diff_index_cached = 1; + opts.diff_index_cached = !DIFF_OPT_TST(opt, FIND_COPIES_HARDER); opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh index 25e7a83659..11502b7509 100755 --- a/t/t4007-rename-3.sh +++ b/t/t4007-rename-3.sh @@ -35,6 +35,11 @@ test_expect_success 'copy detection' ' compare_diff_raw current expected ' +test_expect_success 'copy detection, cached' ' + git diff-index -C --find-copies-harder --cached $tree >current && + compare_diff_raw current expected +' + # In the tree, there is only path0/COPYING. In the cache, path0 and # path1 both have COPYING and the latter is a copy of path0/COPYING. # However when we say we care only about path1, we should just see -- cgit v1.2.3 From 90b1994170900514a1ce7a3345e25cb7216915cc Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 23 May 2009 01:15:35 -0700 Subject: diff: Rename QUIET internal option to QUICK The option "QUIET" primarily meant "find if we have _any_ difference as quick as possible and report", which means we often do not even have to look at blobs if we know the trees are different by looking at the higher level (e.g. "diff-tree A B"). As a side effect, because there is no point showing one change that we happened to have found first, it also enables NO_OUTPUT and EXIT_WITH_STATUS options, making the end result look quiet. Rename the internal option to QUICK to reflect this better; it also makes grepping the source tree much easier, as there are other kinds of QUIET option everywhere. Signed-off-by: Junio C Hamano --- builtin-log.c | 2 +- builtin-rev-list.c | 2 +- diff-lib.c | 4 ++-- diff.c | 4 ++-- diff.h | 2 +- revision.c | 2 +- tree-diff.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) (limited to 'diff-lib.c') diff --git a/builtin-log.c b/builtin-log.c index 0c2fa0ae2d..7903e5a78f 100644 --- a/builtin-log.c +++ b/builtin-log.c @@ -537,7 +537,7 @@ static int reopen_stdout(struct commit *commit, struct rev_info *rev) get_patch_filename(commit, rev->nr, fmt_patch_suffix, &filename); - if (!DIFF_OPT_TST(&rev->diffopt, QUIET)) + if (!DIFF_OPT_TST(&rev->diffopt, QUICK)) fprintf(realstdout, "%s\n", filename.buf + outdir_offset); if (freopen(filename.buf, "w", stdout) == NULL) diff --git a/builtin-rev-list.c b/builtin-rev-list.c index 4ba1c12e0b..69753dc206 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -320,7 +320,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) memset(&info, 0, sizeof(info)); info.revs = &revs; - quiet = DIFF_OPT_TST(&revs.diffopt, QUIET); + quiet = DIFF_OPT_TST(&revs.diffopt, QUICK); for (i = 1 ; i < argc; i++) { const char *arg = argv[i]; diff --git a/diff-lib.c b/diff-lib.c index ad2a4cde74..b7813af614 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -73,7 +73,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) struct cache_entry *ce = active_cache[i]; int changed; - if (DIFF_OPT_TST(&revs->diffopt, QUIET) && + if (DIFF_OPT_TST(&revs->diffopt, QUICK) && DIFF_OPT_TST(&revs->diffopt, HAS_CHANGES)) break; @@ -523,7 +523,7 @@ int index_differs_from(const char *def, int diff_flags) init_revisions(&rev, NULL); setup_revisions(0, NULL, &rev, def); - DIFF_OPT_SET(&rev.diffopt, QUIET); + DIFF_OPT_SET(&rev.diffopt, QUICK); DIFF_OPT_SET(&rev.diffopt, EXIT_WITH_STATUS); rev.diffopt.flags |= diff_flags; run_diff_index(&rev, 1); diff --git a/diff.c b/diff.c index 467925d931..91d6ea21a9 100644 --- a/diff.c +++ b/diff.c @@ -2452,7 +2452,7 @@ int diff_setup_done(struct diff_options *options) * to have found. It does not make sense not to return with * exit code in such a case either. */ - if (DIFF_OPT_TST(options, QUIET)) { + if (DIFF_OPT_TST(options, QUICK)) { options->output_format = DIFF_FORMAT_NO_OUTPUT; DIFF_OPT_SET(options, EXIT_WITH_STATUS); } @@ -2643,7 +2643,7 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) else if (!strcmp(arg, "--exit-code")) DIFF_OPT_SET(options, EXIT_WITH_STATUS); else if (!strcmp(arg, "--quiet")) - DIFF_OPT_SET(options, QUIET); + DIFF_OPT_SET(options, QUICK); else if (!strcmp(arg, "--ext-diff")) DIFF_OPT_SET(options, ALLOW_EXTERNAL); else if (!strcmp(arg, "--no-ext-diff")) diff --git a/diff.h b/diff.h index 538e4f0d8f..a7e7ccbd42 100644 --- a/diff.h +++ b/diff.h @@ -55,7 +55,7 @@ typedef void (*diff_format_fn_t)(struct diff_queue_struct *q, #define DIFF_OPT_COLOR_DIFF (1 << 8) #define DIFF_OPT_COLOR_DIFF_WORDS (1 << 9) #define DIFF_OPT_HAS_CHANGES (1 << 10) -#define DIFF_OPT_QUIET (1 << 11) +#define DIFF_OPT_QUICK (1 << 11) #define DIFF_OPT_NO_INDEX (1 << 12) #define DIFF_OPT_ALLOW_EXTERNAL (1 << 13) #define DIFF_OPT_EXIT_WITH_STATUS (1 << 14) diff --git a/revision.c b/revision.c index 9f5dac5f1d..b8afc7c2b5 100644 --- a/revision.c +++ b/revision.c @@ -791,7 +791,7 @@ void init_revisions(struct rev_info *revs, const char *prefix) revs->ignore_merges = 1; revs->simplify_history = 1; DIFF_OPT_SET(&revs->pruning, RECURSIVE); - DIFF_OPT_SET(&revs->pruning, QUIET); + DIFF_OPT_SET(&revs->pruning, QUICK); revs->pruning.add_remove = file_add_remove; revs->pruning.change = file_change; revs->lifo = 1; diff --git a/tree-diff.c b/tree-diff.c index 7c526d33f4..7d745b4406 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -286,7 +286,7 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, stru int baselen = strlen(base); for (;;) { - if (DIFF_OPT_TST(opt, QUIET) && + if (DIFF_OPT_TST(opt, QUICK) && DIFF_OPT_TST(opt, HAS_CHANGES)) break; if (opt->nr_paths) { -- cgit v1.2.3 From 29796c6ccff3e70622398379fdcdfa3fe43333ac Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 4 Aug 2009 16:25:40 -0700 Subject: diff-index: report unmerged new entries Since an earlier change to diff-index by d1f2d7e (Make run_diff_index() use unpack_trees(), not read_tree(), 2008-01-19), we stopped reporting an unmerged path that does not exist in the tree, but we should. Signed-off-by: Junio C Hamano --- diff-lib.c | 4 ++-- t/t7060-wtstatus.sh | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100755 t/t7060-wtstatus.sh (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ad2a4cde74..ad5b6cac7b 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -348,8 +348,8 @@ static void do_oneway_diff(struct unpack_trees_options *o, match_missing = !revs->ignore_merges; if (cached && idx && ce_stage(idx)) { - if (tree) - diff_unmerge(&revs->diffopt, idx->name, idx->ce_mode, idx->sha1); + diff_unmerge(&revs->diffopt, idx->name, idx->ce_mode, + idx->sha1); return; } diff --git a/t/t7060-wtstatus.sh b/t/t7060-wtstatus.sh new file mode 100755 index 0000000000..5ad2cd1d04 --- /dev/null +++ b/t/t7060-wtstatus.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +test_description='basic work tree status reporting' + +. ./test-lib.sh + +test_expect_success setup ' + test_commit A && + test_commit B oneside added && + git checkout A^0 && + test_commit C oneside created +' + +test_expect_success 'A/A conflict' ' + git checkout B^0 && + test_must_fail git merge C +' + +test_expect_success 'Report path with conflict' ' + git diff --cached --name-status >actual && + echo "U oneside" >expect && + test_cmp expect actual +' + +test_expect_success 'Report new path with conflict' ' + git diff --cached --name-status HEAD^ >actual && + echo "U oneside" >expect && + test_cmp expect actual +' + +test_done -- cgit v1.2.3 From 26da1d78674204c482ec90905dd4de3f6bcd3c5f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 4 Aug 2009 22:08:16 -0700 Subject: diff-index: keep the original index intact When comparing the index and a tree, we used to read the contents of the tree into stage #1 of the index and compared them with stage #0. In order not to lose sight of entries originally unmerged in the index, we hoisted them to stage #3 before reading the tree. Commit d1f2d7e (Make run_diff_index() use unpack_trees(), not read_tree(), 2008-01-19) changed all this. These days, we instead use unpack_trees() API to traverse the tree and compare the contents with the index, without modifying the index at all. There is no reason to hoist the unmerged entries to stage #3 anymore. Signed-off-by: Junio C Hamano --- diff-lib.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ad5b6cac7b..2a82dac101 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -308,22 +308,6 @@ static int show_modified(struct rev_info *revs, return 0; } -/* - * This turns all merge entries into "stage 3". That guarantees that - * when we read in the new tree (into "stage 1"), we won't lose sight - * of the fact that we had unmerged entries. - */ -static void mark_merge_entries(void) -{ - int i; - for (i = 0; i < active_nr; i++) { - struct cache_entry *ce = active_cache[i]; - if (!ce_stage(ce)) - continue; - ce->ce_flags |= CE_STAGEMASK; - } -} - /* * This gets a mix of an existing index and a tree, one pathname entry * at a time. The index entry may be a single stage-0 one, but it could @@ -435,8 +419,6 @@ int run_diff_index(struct rev_info *revs, int cached) struct unpack_trees_options opts; struct tree_desc t; - mark_merge_entries(); - ent = revs->pending.objects[0].item; tree_name = revs->pending.objects[0].name; tree = parse_tree_indirect(ent->sha1); -- cgit v1.2.3 From 540e694b139dd034b21de087001ac9b6d7606c94 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Tue, 11 Aug 2009 22:43:59 +0700 Subject: Prevent diff machinery from examining assume-unchanged entries on worktree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- diff-lib.c | 6 ++++-- t/t4039-diff-assume-unchanged.sh | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100755 t/t4039-diff-assume-unchanged.sh (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index ad2a4cde74..22da66ef14 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -162,7 +162,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (ce_uptodate(ce)) continue; - changed = check_removed(ce, &st); + /* If CE_VALID is set, don't look at workdir for file removal */ + changed = (ce->ce_flags & CE_VALID) ? 0 : check_removed(ce, &st); if (changed) { if (changed < 0) { perror(ce->name); @@ -337,6 +338,8 @@ static void do_oneway_diff(struct unpack_trees_options *o, struct rev_info *revs = o->unpack_data; int match_missing, cached; + /* if the entry is not checked out, don't examine work tree */ + cached = o->index_only || (idx && (idx->ce_flags & CE_VALID)); /* * Backward compatibility wart - "diff-index -m" does * not mean "do not ignore merges", but "match_missing". @@ -344,7 +347,6 @@ static void do_oneway_diff(struct unpack_trees_options *o, * But with the revision flag parsing, that's found in * "!revs->ignore_merges". */ - cached = o->index_only; match_missing = !revs->ignore_merges; if (cached && idx && ce_stage(idx)) { diff --git a/t/t4039-diff-assume-unchanged.sh b/t/t4039-diff-assume-unchanged.sh new file mode 100755 index 0000000000..9d9498bd95 --- /dev/null +++ b/t/t4039-diff-assume-unchanged.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +test_description='diff with assume-unchanged entries' + +. ./test-lib.sh + +# external diff has been tested in t4020-diff-external.sh + +test_expect_success 'setup' ' + echo zero > zero && + git add zero && + git commit -m zero && + echo one > one && + echo two > two && + git add one two && + git commit -m onetwo && + git update-index --assume-unchanged one && + echo borked >> one && + test "$(git ls-files -v one)" = "h one" +' + +test_expect_success 'diff-index does not examine assume-unchanged entries' ' + git diff-index HEAD^ -- one | grep -q 5626abf0f72e58d7a153368ba57db4c673c0e171 +' + +test_expect_success 'diff-files does not examine assume-unchanged entries' ' + rm one && + test -z "$(git diff-files -- one)" +' + +test_done -- cgit v1.2.3 From b4d1690df11ae6ce382b93778616b1a20f1774ff Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Thu, 20 Aug 2009 20:46:58 +0700 Subject: Teach Git to respect skip-worktree bit (reading part) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit grep: turn on --cached for files that is marked skip-worktree ls-files: do not check for deleted file that is marked skip-worktree update-index: ignore update request if it's skip-worktree, while still allows removing diff*: skip worktree version Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin-commit.c | 5 ++ builtin-grep.c | 2 +- builtin-ls-files.c | 2 + builtin-update-index.c | 38 +++++---- diff-lib.c | 5 +- diff.c | 2 +- read-cache.c | 8 +- t/t7011-skip-worktree-reading.sh | 163 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 199 insertions(+), 26 deletions(-) create mode 100755 t/t7011-skip-worktree-reading.sh (limited to 'diff-lib.c') diff --git a/builtin-commit.c b/builtin-commit.c index 4bcce06fbf..a0b1fd35cb 100644 --- a/builtin-commit.c +++ b/builtin-commit.c @@ -180,6 +180,11 @@ static void add_remove_files(struct string_list *list) for (i = 0; i < list->nr; i++) { struct stat st; struct string_list_item *p = &(list->items[i]); + int pos = index_name_pos(&the_index, p->string, strlen(p->string)); + struct cache_entry *ce = pos < 0 ? NULL : active_cache[pos]; + + if (ce && ce_skip_worktree(ce)) + continue; if (!lstat(p->string, &st)) { if (add_to_cache(p->string, &st, 0)) diff --git a/builtin-grep.c b/builtin-grep.c index ad0e0a5385..813fe9778a 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -517,7 +517,7 @@ static int grep_cache(struct grep_opt *opt, const char **paths, int cached, * are identical, even if worktree file has been modified, so use * cache version instead */ - if (cached || (ce->ce_flags & CE_VALID)) { + if (cached || (ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) { if (ce_stage(ce)) continue; hit |= grep_sha1(opt, ce->sha1, ce->name, 0); diff --git a/builtin-ls-files.c b/builtin-ls-files.c index c1afbad453..ad7e44784f 100644 --- a/builtin-ls-files.c +++ b/builtin-ls-files.c @@ -194,6 +194,8 @@ static void show_files(struct dir_struct *dir, const char *prefix) continue; if (ce->ce_flags & CE_UPDATE) continue; + if (ce_skip_worktree(ce)) + continue; err = lstat(ce->name, &st); if (show_deleted && err) show_ce_entry(tag_removed, ce); diff --git a/builtin-update-index.c b/builtin-update-index.c index 5e97d09497..97b9ea61f7 100644 --- a/builtin-update-index.c +++ b/builtin-update-index.c @@ -172,29 +172,29 @@ static int process_directory(const char *path, int len, struct stat *st) return error("%s: is a directory - add files inside instead", path); } -/* - * Process a regular file - */ -static int process_file(const char *path, int len, struct stat *st) -{ - int pos = cache_name_pos(path, len); - struct cache_entry *ce = pos < 0 ? NULL : active_cache[pos]; - - if (ce && S_ISGITLINK(ce->ce_mode)) - return error("%s is already a gitlink, not replacing", path); - - return add_one_path(ce, path, len, st); -} - static int process_path(const char *path) { - int len; + int pos, len; struct stat st; + struct cache_entry *ce; len = strlen(path); if (has_symlink_leading_path(path, len)) return error("'%s' is beyond a symbolic link", path); + pos = cache_name_pos(path, len); + ce = pos < 0 ? NULL : active_cache[pos]; + if (ce && ce_skip_worktree(ce)) { + /* + * working directory version is assumed "good" + * so updating it does not make sense. + * On the other hand, removing it from index should work + */ + if (allow_remove && remove_file_from_cache(path)) + return error("%s: cannot remove from the index", path); + return 0; + } + /* * First things first: get the stat information, to decide * what to do about the pathname! @@ -205,7 +205,13 @@ static int process_path(const char *path) if (S_ISDIR(st.st_mode)) return process_directory(path, len, &st); - return process_file(path, len, &st); + /* + * Process a regular file + */ + if (ce && S_ISGITLINK(ce->ce_mode)) + return error("%s is already a gitlink, not replacing", path); + + return add_one_path(ce, path, len, &st); } static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, diff --git a/diff-lib.c b/diff-lib.c index 22da66ef14..b0b379d9d2 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -159,7 +159,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } - if (ce_uptodate(ce)) + if (ce_uptodate(ce) || ce_skip_worktree(ce)) continue; /* If CE_VALID is set, don't look at workdir for file removal */ @@ -339,7 +339,8 @@ static void do_oneway_diff(struct unpack_trees_options *o, int match_missing, cached; /* if the entry is not checked out, don't examine work tree */ - cached = o->index_only || (idx && (idx->ce_flags & CE_VALID)); + cached = o->index_only || + (idx && ((idx->ce_flags & CE_VALID) || ce_skip_worktree(idx))); /* * Backward compatibility wart - "diff-index -m" does * not mean "do not ignore merges", but "match_missing". diff --git a/diff.c b/diff.c index cd35e0c2d7..3970df4afc 100644 --- a/diff.c +++ b/diff.c @@ -1805,7 +1805,7 @@ static int reuse_worktree_file(const char *name, const unsigned char *sha1, int * If ce is marked as "assume unchanged", there is no * guarantee that work tree matches what we are looking for. */ - if (ce->ce_flags & CE_VALID) + if ((ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) return 0; /* diff --git a/read-cache.c b/read-cache.c index 4e3e272ee4..5ee7d9da9c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -265,7 +265,7 @@ int ie_match_stat(const struct index_state *istate, * If it's marked as always valid in the index, it's * valid whatever the checked-out copy says. */ - if (!ignore_valid && (ce->ce_flags & CE_VALID)) + if (!ignore_valid && ((ce->ce_flags & CE_VALID) || ce_skip_worktree(ce))) return 0; /* @@ -1004,11 +1004,7 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, if (ce_uptodate(ce)) return ce; - /* - * CE_VALID means the user promised us that the change to - * the work tree does not matter and told us not to worry. - */ - if (!ignore_valid && (ce->ce_flags & CE_VALID)) { + if (!ignore_valid && ((ce->ce_flags & CE_VALID) || ce_skip_worktree(ce))) { ce_mark_uptodate(ce); return ce; } diff --git a/t/t7011-skip-worktree-reading.sh b/t/t7011-skip-worktree-reading.sh new file mode 100755 index 0000000000..e996928de2 --- /dev/null +++ b/t/t7011-skip-worktree-reading.sh @@ -0,0 +1,163 @@ +#!/bin/sh +# +# Copyright (c) 2008 Nguyễn Thái Ngọc Duy +# + +test_description='skip-worktree bit test' + +. ./test-lib.sh + +cat >expect.full <expect.skip < expected && + git ls-files --stage 1 > result && + test_cmp expected result && + test ! -f 1 +} + +setup_dirty() { + git update-index --force-remove 1 && + echo dirty > 1 && + git update-index --add --cacheinfo 100644 $NULL_SHA1 1 && + git update-index --skip-worktree 1 +} + +test_dirty() { + echo "100644 $NULL_SHA1 0 1" > expected && + git ls-files --stage 1 > result && + test_cmp expected result && + echo dirty > expected + test_cmp expected 1 +} + +test_expect_success 'setup' ' + test_commit init && + mkdir sub && + touch ./1 ./2 sub/1 sub/2 && + git add 1 2 sub/1 sub/2 && + git update-index --skip-worktree 1 sub/1 && + git ls-files -t > result && + test_cmp expect.skip result +' + +test_expect_success 'update-index' ' + setup_absent && + git update-index 1 && + test_absent +' + +test_expect_success 'update-index' ' + setup_dirty && + git update-index 1 && + test_dirty +' + +test_expect_success 'update-index --remove' ' + setup_absent && + git update-index --remove 1 && + test -z "$(git ls-files 1)" && + test ! -f 1 +' + +test_expect_success 'update-index --remove' ' + setup_dirty && + git update-index --remove 1 && + test -z "$(git ls-files 1)" && + echo dirty > expected && + test_cmp expected 1 +' + +test_expect_success 'ls-files --delete' ' + setup_absent && + test -z "$(git ls-files -d)" +' + +test_expect_success 'ls-files --delete' ' + setup_dirty && + test -z "$(git ls-files -d)" +' + +test_expect_success 'ls-files --modified' ' + setup_absent && + test -z "$(git ls-files -m)" +' + +test_expect_success 'ls-files --modified' ' + setup_dirty && + test -z "$(git ls-files -m)" +' + +test_expect_success 'grep with skip-worktree file' ' + git update-index --no-skip-worktree 1 && + echo test > 1 && + git update-index 1 && + git update-index --skip-worktree 1 && + rm 1 && + test "$(git grep --no-ext-grep test)" = "1:test" +' + +echo ":000000 100644 $ZERO_SHA0 $NULL_SHA1 A 1" > expected +test_expect_success 'diff-index does not examine skip-worktree absent entries' ' + setup_absent && + git diff-index HEAD -- 1 > result && + test_cmp expected result +' + +test_expect_success 'diff-index does not examine skip-worktree dirty entries' ' + setup_dirty && + git diff-index HEAD -- 1 > result && + test_cmp expected result +' + +test_expect_success 'diff-files does not examine skip-worktree absent entries' ' + setup_absent && + test -z "$(git diff-files -- one)" +' + +test_expect_success 'diff-files does not examine skip-worktree dirty entries' ' + setup_dirty && + test -z "$(git diff-files -- one)" +' + +test_expect_success 'git-rm succeeds on skip-worktree absent entries' ' + setup_absent && + git rm 1 +' + +test_expect_failure 'commit on skip-worktree absent entries' ' + git reset && + setup_absent && + test_must_fail git commit -m null 1 +' + +test_expect_failure 'commit on skip-worktree dirty entries' ' + git reset && + setup_dirty && + test_must_fail git commit -m null 1 +' + +test_done -- cgit v1.2.3 From da8ba5e7da01be597aa9417c563dbd516ea5f204 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 17 Sep 2009 22:12:17 -0700 Subject: diff-lib.c: fix misleading comments on oneway_diff() 20a16eb (unpack_trees(): fix diff-index regression., 2008-03-10) adjusted diff-index to the new world order since 34110cd (Make 'unpack_trees()' have a separate source and destination index, 2008-03-06). Callbacks are expected to return anything non-negative as "success", and instead of reporting how many index entries they have processed, they are expected to advance o->pos themselves. The code did so, but a stale comment was left behind. Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 0c74ef5cbe..adf1c5fdee 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -383,7 +383,7 @@ static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_op * For diffing, the index is more important, and we only have a * single tree. * - * We're supposed to return how many index entries we want to skip. + * We're supposed to advance o->pos to skip what we have already processed. * * This wrapper makes it all more readable, and takes care of all * the fairly complex unpack_trees() semantic requirements, including -- cgit v1.2.3 From da165f470ede6540873d33cb8bc2ff8e13c60520 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 7 Jan 2010 14:59:54 -0800 Subject: unpack-trees.c: prepare for looking ahead in the index This prepares but does not yet implement a look-ahead in the index entries when traverse-trees.c decides to give us tree entries in an order that does not match what is in the index. A case where a look-ahead in the index is necessary happens when merging branch B into branch A while the index matches the current branch A, using a tree O as their common ancestor, and these three trees looks like this: O A B t t t-i t-i t-i t-j t-j t/1 t/2 The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and B first, and notices that A may have a matching "t" behind "t-i" and "t-j" (indeed it does), and tells A to give that entry instead. After unpacking blob "t" from tree B (as it hasn't changed since O in B and A removed it, it will result in its removal), it descends into directory "t/". The side that walked index in parallel to the tree traversal used to be implemented with one pointer, o->pos, that points at the next index entry to be processed. When this happens, the pointer o->pos still points at "t-i" that is the first entry. We should be able to skip "t-i" and "t-j" and locate "t/1" from the index while the recursive invocation of traverse_trees() walks and match entries found there, and later come back to process "t-i". While that look-ahead is not implemented yet, this adds a flag bit, CE_UNPACKED, to mark the entries in the index that has already been processed. o->pos pointer has been renamed to o->cache_bottom and it points at the first entry that may still need to be processed. Signed-off-by: Junio C Hamano --- cache.h | 2 + diff-lib.c | 18 ----- unpack-trees.c | 216 +++++++++++++++++++++++++++++++++++++++++++++------------ unpack-trees.h | 2 +- 4 files changed, 173 insertions(+), 65 deletions(-) (limited to 'diff-lib.c') diff --git a/cache.h b/cache.h index bf468e5235..9a9596386d 100644 --- a/cache.h +++ b/cache.h @@ -178,6 +178,8 @@ struct cache_entry { #define CE_HASHED (0x100000) #define CE_UNHASHED (0x200000) +#define CE_UNPACKED (0x1000000) + /* * Extended on-disk flags */ diff --git a/diff-lib.c b/diff-lib.c index adf1c5fdee..f759917d33 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -359,21 +359,6 @@ static void do_oneway_diff(struct unpack_trees_options *o, show_modified(revs, tree, idx, 1, cached, match_missing); } -static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o) -{ - int len = ce_namelen(ce); - const struct index_state *index = o->src_index; - - while (o->pos < index->cache_nr) { - struct cache_entry *next = index->cache[o->pos]; - if (len != ce_namelen(next)) - break; - if (memcmp(ce->name, next->name, len)) - break; - o->pos++; - } -} - /* * The unpack_trees() interface is designed for merging, so * the different source entries are designed primarily for @@ -395,9 +380,6 @@ static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o) struct cache_entry *tree = src[1]; struct rev_info *revs = o->unpack_data; - if (idx && ce_stage(idx)) - skip_same_name(idx, o); - /* * Unpack-trees generates a DF/conflict entry if * there was a directory in the index and a tree diff --git a/unpack-trees.c b/unpack-trees.c index 7413ff0b93..685adb4b77 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -126,18 +126,109 @@ static inline int call_unpack_fn(struct cache_entry **src, struct unpack_trees_o return ret; } -static int unpack_index_entry(struct cache_entry *ce, struct unpack_trees_options *o) +static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o) +{ + ce->ce_flags |= CE_UNPACKED; + + if (o->cache_bottom < o->src_index->cache_nr && + o->src_index->cache[o->cache_bottom] == ce) { + int bottom = o->cache_bottom; + while (bottom < o->src_index->cache_nr && + o->src_index->cache[bottom]->ce_flags & CE_UNPACKED) + bottom++; + o->cache_bottom = bottom; + } +} + +static void mark_all_ce_unused(struct index_state *index) +{ + int i; + for (i = 0; i < index->cache_nr; i++) + index->cache[i]->ce_flags &= ~CE_UNPACKED; +} + +static int locate_in_src_index(struct cache_entry *ce, + struct unpack_trees_options *o) +{ + struct index_state *index = o->src_index; + int len = ce_namelen(ce); + int pos = index_name_pos(index, ce->name, len); + if (pos < 0) + pos = -1 - pos; + return pos; +} + +/* + * We call unpack_index_entry() with an unmerged cache entry + * only in diff-index, and it wants a single callback. Skip + * the other unmerged entry with the same name. + */ +static void mark_ce_used_same_name(struct cache_entry *ce, + struct unpack_trees_options *o) +{ + struct index_state *index = o->src_index; + int len = ce_namelen(ce); + int pos; + + for (pos = locate_in_src_index(ce, o); pos < index->cache_nr; pos++) { + struct cache_entry *next = index->cache[pos]; + if (len != ce_namelen(next) || + memcmp(ce->name, next->name, len)) + break; + mark_ce_used(next, o); + } +} + +static struct cache_entry *next_cache_entry(struct unpack_trees_options *o) +{ + const struct index_state *index = o->src_index; + int pos = o->cache_bottom; + + while (pos < index->cache_nr) { + struct cache_entry *ce = index->cache[pos]; + if (!(ce->ce_flags & CE_UNPACKED)) + return ce; + pos++; + } + return NULL; +} + +static void add_same_unmerged(struct cache_entry *ce, + struct unpack_trees_options *o) +{ + struct index_state *index = o->src_index; + int len = ce_namelen(ce); + int pos = index_name_pos(index, ce->name, len); + + if (0 <= pos) + die("programming error in a caller of mark_ce_used_same_name"); + for (pos = -pos - 1; pos < index->cache_nr; pos++) { + struct cache_entry *next = index->cache[pos]; + if (len != ce_namelen(next) || + memcmp(ce->name, next->name, len)) + break; + add_entry(o, next, 0, 0); + mark_ce_used(next, o); + } +} + +static int unpack_index_entry(struct cache_entry *ce, + struct unpack_trees_options *o) { struct cache_entry *src[5] = { ce, NULL, }; + int ret; - o->pos++; + mark_ce_used(ce, o); if (ce_stage(ce)) { if (o->skip_unmerged) { add_entry(o, ce, 0, 0); return 0; } } - return call_unpack_fn(src, o); + ret = call_unpack_fn(src, o); + if (ce_stage(ce)) + mark_ce_used_same_name(ce, o); + return ret; } static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, struct traverse_info *info) @@ -212,6 +303,20 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf return ce_namelen(ce) > traverse_path_len(info, n); } +static int ce_in_traverse_path(const struct cache_entry *ce, + const struct traverse_info *info) +{ + if (!info->prev) + return 1; + if (do_compare_entry(ce, info->prev, &info->name)) + return 0; + /* + * If ce (blob) is the same name as the path (which is a tree + * we will be descending into), it won't be inside it. + */ + return (info->pathlen < ce_namelen(ce)); +} + static struct cache_entry *create_ce_entry(const struct traverse_info *info, const struct name_entry *n, int stage) { int len = traverse_path_len(info, n); @@ -300,23 +405,27 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str /* Are we supposed to look at the index too? */ if (o->merge) { - while (o->pos < o->src_index->cache_nr) { - struct cache_entry *ce = o->src_index->cache[o->pos]; - int cmp = compare_entry(ce, info, p); + while (1) { + struct cache_entry *ce = next_cache_entry(o); + int cmp; + if (!ce) + break; + cmp = compare_entry(ce, info, p); if (cmp < 0) { if (unpack_index_entry(ce, o) < 0) return unpack_failed(o, NULL); continue; } if (!cmp) { - o->pos++; if (ce_stage(ce)) { /* - * If we skip unmerged index entries, we'll skip this - * entry *and* the tree entries associated with it! + * If we skip unmerged index + * entries, we'll skip this + * entry *and* the tree + * entries associated with it! */ if (o->skip_unmerged) { - add_entry(o, ce, 0, 0); + add_same_unmerged(ce, o); return mask; } } @@ -329,6 +438,13 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0) return -1; + if (src[0]) { + if (ce_stage(src[0])) + mark_ce_used_same_name(src[0], o); + else + mark_ce_used(src[0], o); + } + /* Now handle any directories.. */ if (dirmask) { unsigned long conflicts = mask & ~dirmask; @@ -345,11 +461,13 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str matches = cache_tree_matches_traversal(o->src_index->cache_tree, names, info); /* - * Everything under the name matches. Adjust o->pos to - * skip the entire hierarchy. + * Everything under the name matches; skip the + * entire hierarchy. diff_index_cached codepath + * special cases D/F conflicts in such a way that + * it does not do any look-ahead, so this is safe. */ if (matches) { - o->pos += matches; + o->cache_bottom += matches; return mask; } } @@ -382,11 +500,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options memset(&o->result, 0, sizeof(o->result)); o->result.initialized = 1; - if (o->src_index) { - o->result.timestamp.sec = o->src_index->timestamp.sec; - o->result.timestamp.nsec = o->src_index->timestamp.nsec; - } + o->result.timestamp.sec = o->src_index->timestamp.sec; + o->result.timestamp.nsec = o->src_index->timestamp.nsec; o->merge_size = len; + mark_all_ce_unused(o->src_index); if (!dfc) dfc = xcalloc(1, cache_entry_size(0)); @@ -400,18 +517,38 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options info.fn = unpack_callback; info.data = o; + if (o->prefix) { + /* + * Unpack existing index entries that sort before the + * prefix the tree is spliced into. Note that o->merge + * is always true in this case. + */ + while (1) { + struct cache_entry *ce = next_cache_entry(o); + if (!ce) + break; + if (ce_in_traverse_path(ce, &info)) + break; + if (unpack_index_entry(ce, o) < 0) + goto return_failed; + } + } + if (traverse_trees(len, t, &info) < 0) - return unpack_failed(o, NULL); + goto return_failed; } /* Any left-over entries in the index? */ if (o->merge) { - while (o->pos < o->src_index->cache_nr) { - struct cache_entry *ce = o->src_index->cache[o->pos]; + while (1) { + struct cache_entry *ce = next_cache_entry(o); + if (!ce) + break; if (unpack_index_entry(ce, o) < 0) - return unpack_failed(o, NULL); + goto return_failed; } } + mark_all_ce_unused(o->src_index); if (o->trivial_merges_only && o->nontrivial_merge) return unpack_failed(o, "Merge requires file-level merging"); @@ -421,6 +558,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (o->dst_index) *o->dst_index = o->result; return ret; + +return_failed: + mark_all_ce_unused(o->src_index); + return unpack_failed(o, NULL); } /* Here come the merge functions */ @@ -522,7 +663,9 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, * in that directory. */ namelen = strlen(ce->name); - for (i = o->pos; i < o->src_index->cache_nr; i++) { + for (i = locate_in_src_index(ce, o); + i < o->src_index->cache_nr; + i++) { struct cache_entry *ce2 = o->src_index->cache[i]; int len = ce_namelen(ce2); if (len < namelen || @@ -530,12 +673,14 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, ce2->name[namelen] != '/') break; /* - * ce2->name is an entry in the subdirectory. + * ce2->name is an entry in the subdirectory to be + * removed. */ if (!ce_stage(ce2)) { if (verify_uptodate(ce2, o)) return -1; add_entry(o, ce2, CE_REMOVE, 0); + mark_ce_used(ce2, o); } cnt++; } @@ -591,7 +736,6 @@ static int verify_absent(struct cache_entry *ce, const char *action, return 0; if (!lstat(ce->name, &st)) { - int ret; int dtype = ce_to_dtype(ce); struct cache_entry *result; @@ -619,28 +763,8 @@ static int verify_absent(struct cache_entry *ce, const char *action, * files that are in "foo/" we would lose * them. */ - ret = verify_clean_subdirectory(ce, action, o); - if (ret < 0) - return ret; - - /* - * If this removed entries from the index, - * what that means is: - * - * (1) the caller unpack_callback() saw path/foo - * in the index, and it has not removed it because - * it thinks it is handling 'path' as blob with - * D/F conflict; - * (2) we will return "ok, we placed a merged entry - * in the index" which would cause o->pos to be - * incremented by one; - * (3) however, original o->pos now has 'path/foo' - * marked with "to be removed". - * - * We need to increment it by the number of - * deleted entries here. - */ - o->pos += ret; + if (verify_clean_subdirectory(ce, action, o) < 0) + return -1; return 0; } diff --git a/unpack-trees.h b/unpack-trees.h index d19df44f40..9a0733ea85 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -30,7 +30,7 @@ struct unpack_trees_options { diff_index_cached, gently; const char *prefix; - int pos; + int cache_bottom; struct dir_struct *dir; merge_fn_t fn; struct unpack_trees_error_msgs msgs; -- cgit v1.2.3 From 730f72840cc50c523fe4cdd796ea2d2fc4571a28 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 20 Sep 2009 00:03:39 -0700 Subject: unpack-trees.c: look ahead in the index This makes the traversal of index be in sync with the tree traversal. When unpack_callback() is fed a set of tree entries from trees, it inspects the name of the entry and checks if the an index entry with the same name could be hiding behind the current index entry, and (1) if the name appears in the index as a leaf node, it is also fed to the n_way_merge() callback function; (2) if the name is a directory in the index, i.e. there are entries in that are underneath it, then nothing is fed to the n_way_merge() callback function; (3) otherwise, if the name comes before the first eligible entry in the index, the index entry is first unpacked alone. When traverse_trees_recursive() descends into a subdirectory, the cache_bottom pointer is moved to walk index entries within that directory. All of these are omitted for diff-index, which does not even want to be fed an index entry and a tree entry with D/F conflicts. This fixes 3-way read-tree and exposes a bug in other parts of the system in t6035, test #5. The test prepares these three trees: O = HEAD^ 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x A = HEAD 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d 100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x B = master 120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d 100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x With a clean index that matches HEAD, running git read-tree -m -u --aggressive $O $A $B now yields 120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d 100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x which is correct. "master" created "a/b" symlink that did not exist, and removed "a/b/c/d" while HEAD did not do touch either path. Before this series, read-tree did not notice the situation and resolved addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had another path "a/b/c/e" added, this merge should conflict but instead it silently resolved "a/b" and then immediately overwrote it to add "a/b/c/e", which was quite bogus. Tests in t1012 start to work with this. Signed-off-by: Junio C Hamano --- diff-lib.c | 1 + diff.c | 17 +++++++ diff.h | 1 + t/t1012-read-tree-df.sh | 8 ++-- unpack-trees.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 140 insertions(+), 7 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index f759917d33..c9998f4c91 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -425,6 +425,7 @@ int run_diff_index(struct rev_info *revs, int cached) exit(128); diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/"); + diffcore_fix_diff_index(&revs->diffopt); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; diff --git a/diff.c b/diff.c index 08bbd3e907..3bfb4a19d2 100644 --- a/diff.c +++ b/diff.c @@ -3628,6 +3628,23 @@ static void diffcore_skip_stat_unmatch(struct diff_options *diffopt) *q = outq; } +static int diffnamecmp(const void *a_, const void *b_) +{ + const struct diff_filepair *a = *((const struct diff_filepair **)a_); + const struct diff_filepair *b = *((const struct diff_filepair **)b_); + const char *name_a, *name_b; + + name_a = a->one ? a->one->path : a->two->path; + name_b = b->one ? b->one->path : b->two->path; + return strcmp(name_a, name_b); +} + +void diffcore_fix_diff_index(struct diff_options *options) +{ + struct diff_queue_struct *q = &diff_queued_diff; + qsort(q->queue, q->nr, sizeof(q->queue[0]), diffnamecmp); +} + void diffcore_std(struct diff_options *options) { if (options->skip_stat_unmatch) diff --git a/diff.h b/diff.h index 15fcecdecd..471f606a92 100644 --- a/diff.h +++ b/diff.h @@ -208,6 +208,7 @@ extern int diff_setup_done(struct diff_options *); #define DIFF_PICKAXE_REGEX 2 extern void diffcore_std(struct diff_options *); +extern void diffcore_fix_diff_index(struct diff_options *); #define COMMON_DIFF_OPTIONS_HELP \ "\ncommon diff options:\n" \ diff --git a/t/t1012-read-tree-df.sh b/t/t1012-read-tree-df.sh index f1e650ac39..9811d467da 100755 --- a/t/t1012-read-tree-df.sh +++ b/t/t1012-read-tree-df.sh @@ -51,7 +51,7 @@ test_expect_success setup ' : ' -test_expect_failure '3-way (1)' ' +test_expect_success '3-way (1)' ' settree A-000 && git read-tree -m -u O-000 A-000 B-000 && checkindex <<-EOF @@ -63,7 +63,7 @@ test_expect_failure '3-way (1)' ' EOF ' -test_expect_failure '3-way (2)' ' +test_expect_success '3-way (2)' ' settree A-001 && git read-tree -m -u O-000 A-001 B-000 && checkindex <<-EOF @@ -76,7 +76,7 @@ test_expect_failure '3-way (2)' ' EOF ' -test_expect_failure '3-way (3)' ' +test_expect_success '3-way (3)' ' settree A-010 && git read-tree -m -u O-010 A-010 B-010 && checkindex <<-EOF @@ -90,7 +90,7 @@ test_expect_failure '3-way (3)' ' EOF ' -test_expect_failure '2-way (1)' ' +test_expect_success '2-way (1)' ' settree O-020 && git read-tree -m -u O-020 A-020 && checkindex <<-EOF diff --git a/unpack-trees.c b/unpack-trees.c index 685adb4b77..74cabc36ff 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -231,9 +231,37 @@ static int unpack_index_entry(struct cache_entry *ce, return ret; } +static int find_cache_pos(struct traverse_info *, const struct name_entry *); + +static void restore_cache_bottom(struct traverse_info *info, int bottom) +{ + struct unpack_trees_options *o = info->data; + + if (o->diff_index_cached) + return; + o->cache_bottom = bottom; +} + +static int switch_cache_bottom(struct traverse_info *info) +{ + struct unpack_trees_options *o = info->data; + int ret, pos; + + if (o->diff_index_cached) + return 0; + ret = o->cache_bottom; + pos = find_cache_pos(info->prev, &info->name); + + if (pos < -1) + o->cache_bottom = -2 - pos; + else if (pos < 0) + o->cache_bottom = o->src_index->cache_nr; + return ret; +} + static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, struct traverse_info *info) { - int i; + int i, ret, bottom; struct tree_desc t[MAX_UNPACK_TREES]; struct traverse_info newinfo; struct name_entry *p; @@ -254,7 +282,11 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long sha1 = names[i].sha1; fill_tree_descriptor(t+i, sha1); } - return traverse_trees(n, t, &newinfo); + + bottom = switch_cache_bottom(&newinfo); + ret = traverse_trees(n, t, &newinfo); + restore_cache_bottom(&newinfo, bottom); + return ret; } /* @@ -393,6 +425,82 @@ static int unpack_failed(struct unpack_trees_options *o, const char *message) return -1; } +/* NEEDSWORK: give this a better name and share with tree-walk.c */ +static int name_compare(const char *a, int a_len, + const char *b, int b_len) +{ + int len = (a_len < b_len) ? a_len : b_len; + int cmp = memcmp(a, b, len); + if (cmp) + return cmp; + return (a_len - b_len); +} + +/* + * The tree traversal is looking at name p. If we have a matching entry, + * return it. If name p is a directory in the index, do not return + * anything, as we will want to match it when the traversal descends into + * the directory. + */ +static int find_cache_pos(struct traverse_info *info, + const struct name_entry *p) +{ + int pos; + struct unpack_trees_options *o = info->data; + struct index_state *index = o->src_index; + int pfxlen = info->pathlen; + int p_len = tree_entry_len(p->path, p->sha1); + + for (pos = o->cache_bottom; pos < index->cache_nr; pos++) { + struct cache_entry *ce = index->cache[pos]; + const char *ce_name, *ce_slash; + int cmp, ce_len; + + if (!ce_in_traverse_path(ce, info)) + continue; + if (ce->ce_flags & CE_UNPACKED) + continue; + ce_name = ce->name + pfxlen; + ce_slash = strchr(ce_name, '/'); + if (ce_slash) + ce_len = ce_slash - ce_name; + else + ce_len = ce_namelen(ce) - pfxlen; + cmp = name_compare(p->path, p_len, ce_name, ce_len); + /* + * Exact match; if we have a directory we need to + * delay returning it. + */ + if (!cmp) + return ce_slash ? -2 - pos : pos; + if (0 < cmp) + continue; /* keep looking */ + /* + * ce_name sorts after p->path; could it be that we + * have files under p->path directory in the index? + * E.g. ce_name == "t-i", and p->path == "t"; we may + * have "t/a" in the index. + */ + if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) && + ce_name[p_len] < '/') + continue; /* keep looking */ + break; + } + return -1; +} + +static struct cache_entry *find_cache_entry(struct traverse_info *info, + const struct name_entry *p) +{ + int pos = find_cache_pos(info, p); + struct unpack_trees_options *o = info->data; + + if (0 <= pos) + return o->src_index->cache[pos]; + else + return NULL; +} + static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info) { struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; @@ -406,8 +514,14 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str /* Are we supposed to look at the index too? */ if (o->merge) { while (1) { - struct cache_entry *ce = next_cache_entry(o); int cmp; + struct cache_entry *ce; + + if (o->diff_index_cached) + ce = next_cache_entry(o); + else + ce = find_cache_entry(info, p); + if (!ce) break; cmp = compare_entry(ce, info, p); -- cgit v1.2.3 From ee6fc514f2df821c2719cc49499a56ef2fb136b0 Mon Sep 17 00:00:00 2001 From: Jens Lehmann Date: Sat, 16 Jan 2010 18:42:24 +0100 Subject: Show submodules as modified when they contain a dirty work tree Until now a submodule only then showed up as modified in the supermodule when the last commit in the submodule differed from the one in the index or the diffed against commit of the superproject. A dirty work tree containing new untracked or modified files in a submodule was undetectable when looking at it from the superproject. Now git status and git diff (against the work tree) in the superproject will also display submodules as modified when they contain untracked or modified files, even if the compared ref matches the HEAD of the submodule. Signed-off-by: Jens Lehmann Signed-off-by: Nanako Shiraishi Signed-off-by: Junio C Hamano --- diff-lib.c | 8 +++-- submodule.c | 49 ++++++++++++++++++++++++++ submodule.h | 1 + t/t7506-status-submodule.sh | 83 +++++++++++++++++++++++++++++++++++++-------- 4 files changed, 124 insertions(+), 17 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 1c7e652a80..9cdf6daa90 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -10,6 +10,7 @@ #include "cache-tree.h" #include "unpack-trees.h" #include "refs.h" +#include "submodule.h" /* * diff-files @@ -159,7 +160,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } - if (ce_uptodate(ce) || ce_skip_worktree(ce)) + if ((ce_uptodate(ce) && !S_ISGITLINK(ce->ce_mode)) || ce_skip_worktree(ce)) continue; /* If CE_VALID is set, don't look at workdir for file removal */ @@ -176,6 +177,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } changed = ce_match_stat(ce, &st, ce_option); + if (S_ISGITLINK(ce->ce_mode) && !changed) + changed = is_submodule_modified(ce->name); if (!changed) { ce_mark_uptodate(ce); if (!DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) @@ -230,7 +233,8 @@ static int get_stat_data(struct cache_entry *ce, return -1; } changed = ce_match_stat(ce, &st, 0); - if (changed) { + if (changed + || (S_ISGITLINK(ce->ce_mode) && is_submodule_modified(ce->name))) { mode = ce_mode_from_stat(ce, st.st_mode); sha1 = null_sha1; } diff --git a/submodule.c b/submodule.c index 86aad653b7..3f851deb6a 100644 --- a/submodule.c +++ b/submodule.c @@ -4,6 +4,7 @@ #include "diff.h" #include "commit.h" #include "revision.h" +#include "run-command.h" int add_submodule_odb(const char *path) { @@ -112,3 +113,51 @@ void show_submodule_summary(FILE *f, const char *path, } strbuf_release(&sb); } + +int is_submodule_modified(const char *path) +{ + int len; + struct child_process cp; + const char *argv[] = { + "status", + "--porcelain", + NULL, + }; + char *env[3]; + struct strbuf buf = STRBUF_INIT; + + strbuf_addf(&buf, "%s/.git/", path); + if (!is_directory(buf.buf)) { + strbuf_release(&buf); + /* The submodule is not checked out, so it is not modified */ + return 0; + + } + strbuf_reset(&buf); + + strbuf_addf(&buf, "GIT_WORK_TREE=%s", path); + env[0] = strbuf_detach(&buf, NULL); + strbuf_addf(&buf, "GIT_DIR=%s/.git", path); + env[1] = strbuf_detach(&buf, NULL); + env[2] = NULL; + + memset(&cp, 0, sizeof(cp)); + cp.argv = argv; + cp.env = (const char *const *)env; + cp.git_cmd = 1; + cp.no_stdin = 1; + cp.out = -1; + if (start_command(&cp)) + die("Could not run git status --porcelain"); + + len = strbuf_read(&buf, cp.out, 1024); + close(cp.out); + + if (finish_command(&cp)) + die("git status --porcelain failed"); + + free(env[0]); + free(env[1]); + strbuf_release(&buf); + return len != 0; +} diff --git a/submodule.h b/submodule.h index 4c0269d679..0773121eb5 100644 --- a/submodule.h +++ b/submodule.h @@ -4,5 +4,6 @@ void show_submodule_summary(FILE *f, const char *path, unsigned char one[20], unsigned char two[20], const char *del, const char *add, const char *reset); +int is_submodule_modified(const char *path); #endif diff --git a/t/t7506-status-submodule.sh b/t/t7506-status-submodule.sh index 3ca17abad1..253c334319 100755 --- a/t/t7506-status-submodule.sh +++ b/t/t7506-status-submodule.sh @@ -5,34 +5,87 @@ test_description='git status for submodule' . ./test-lib.sh test_expect_success 'setup' ' - test_create_repo sub - cd sub && - : >bar && - git add bar && - git commit -m " Add bar" && - cd .. && - git add sub && + test_create_repo sub && + ( + cd sub && + : >bar && + git add bar && + git commit -m " Add bar" && + : >foo && + git add foo && + git commit -m " Add foo" + ) && + echo output > .gitignore && + git add sub .gitignore && git commit -m "Add submodule sub" ' test_expect_success 'status clean' ' - git status | - grep "nothing to commit" + git status >output && + grep "nothing to commit" output ' + test_expect_success 'commit --dry-run -a clean' ' - git commit --dry-run -a | - grep "nothing to commit" + test_must_fail git commit --dry-run -a >output && + grep "nothing to commit" output +' + +test_expect_success 'status with modified file in submodule' ' + (cd sub && git reset --hard) && + echo "changed" >sub/foo && + git status >output && + grep "modified: sub" output +' + +test_expect_success 'status with modified file in submodule (porcelain)' ' + (cd sub && git reset --hard) && + echo "changed" >sub/foo && + git status --porcelain >output && + diff output - <<-\EOF + M sub + EOF +' + +test_expect_success 'status with added file in submodule' ' + (cd sub && git reset --hard && echo >foo && git add foo) && + git status >output && + grep "modified: sub" output +' + +test_expect_success 'status with added file in submodule (porcelain)' ' + (cd sub && git reset --hard && echo >foo && git add foo) && + git status --porcelain >output && + diff output - <<-\EOF + M sub + EOF +' + +test_expect_success 'status with untracked file in submodule' ' + (cd sub && git reset --hard) && + echo "content" >sub/new-file && + git status >output && + grep "modified: sub" output +' + +test_expect_success 'status with untracked file in submodule (porcelain)' ' + git status --porcelain >output && + diff output - <<-\EOF + M sub + EOF ' + test_expect_success 'rm submodule contents' ' rm -rf sub/* sub/.git ' + test_expect_success 'status clean (empty submodule dir)' ' - git status | - grep "nothing to commit" + git status >output && + grep "nothing to commit" output ' + test_expect_success 'status -a clean (empty submodule dir)' ' - git commit --dry-run -a | - grep "nothing to commit" + test_must_fail git commit --dry-run -a >output && + grep "nothing to commit" output ' test_done -- cgit v1.2.3 From e3d42c4773bccebb50f01b108d20b06c6a11e615 Mon Sep 17 00:00:00 2001 From: Jens Lehmann Date: Mon, 18 Jan 2010 21:26:18 +0100 Subject: Performance optimization for detection of modified submodules In the worst case is_submodule_modified() got called three times for each submodule. The information we got from scanning the whole submodule tree the first time can be reused instead. New parameters have been added to diff_change() and diff_addremove(), the information is stored in a new member of struct diff_filespec. Its value is then reused instead of calling is_submodule_modified() again. When no explicit "-dirty" is needed in the output the call to is_submodule_modified() is not necessary when the submodules HEAD already disagrees with the ref of the superproject, as this alone marks it as modified. To achieve that, get_stat_data() got an extra argument. Signed-off-by: Jens Lehmann Signed-off-by: Junio C Hamano --- diff-lib.c | 46 +++++++++++++++++++++++++++++++--------------- diff.c | 15 +++++++++++---- diff.h | 10 ++++++---- diffcore.h | 1 + revision.c | 5 +++-- tree-diff.c | 8 ++++---- 6 files changed, 56 insertions(+), 29 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 9cdf6daa90..23e180eed1 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -73,6 +73,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) unsigned int oldmode, newmode; struct cache_entry *ce = active_cache[i]; int changed; + unsigned dirty_submodule = 0; if (DIFF_OPT_TST(&revs->diffopt, QUICK) && DIFF_OPT_TST(&revs->diffopt, HAS_CHANGES)) @@ -173,12 +174,16 @@ int run_diff_files(struct rev_info *revs, unsigned int option) if (silent_on_removed) continue; diff_addremove(&revs->diffopt, '-', ce->ce_mode, - ce->sha1, ce->name); + ce->sha1, ce->name, 0); continue; } changed = ce_match_stat(ce, &st, ce_option); - if (S_ISGITLINK(ce->ce_mode) && !changed) - changed = is_submodule_modified(ce->name); + if (S_ISGITLINK(ce->ce_mode) + && (!changed || (revs->diffopt.output_format & DIFF_FORMAT_PATCH)) + && is_submodule_modified(ce->name)) { + changed = 1; + dirty_submodule = 1; + } if (!changed) { ce_mark_uptodate(ce); if (!DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)) @@ -188,7 +193,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) newmode = ce_mode_from_stat(ce, st.st_mode); diff_change(&revs->diffopt, oldmode, newmode, ce->sha1, (changed ? null_sha1 : ce->sha1), - ce->name); + ce->name, 0, dirty_submodule); } diffcore_std(&revs->diffopt); @@ -204,16 +209,18 @@ int run_diff_files(struct rev_info *revs, unsigned int option) static void diff_index_show_file(struct rev_info *revs, const char *prefix, struct cache_entry *ce, - const unsigned char *sha1, unsigned int mode) + const unsigned char *sha1, unsigned int mode, + unsigned dirty_submodule) { diff_addremove(&revs->diffopt, prefix[0], mode, - sha1, ce->name); + sha1, ce->name, dirty_submodule); } static int get_stat_data(struct cache_entry *ce, const unsigned char **sha1p, unsigned int *modep, - int cached, int match_missing) + int cached, int match_missing, + unsigned *dirty_submodule, int output_format) { const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; @@ -233,8 +240,13 @@ static int get_stat_data(struct cache_entry *ce, return -1; } changed = ce_match_stat(ce, &st, 0); - if (changed - || (S_ISGITLINK(ce->ce_mode) && is_submodule_modified(ce->name))) { + if (S_ISGITLINK(ce->ce_mode) + && (!changed || (output_format & DIFF_FORMAT_PATCH)) + && is_submodule_modified(ce->name)) { + changed = 1; + *dirty_submodule = 1; + } + if (changed) { mode = ce_mode_from_stat(ce, st.st_mode); sha1 = null_sha1; } @@ -251,15 +263,17 @@ static void show_new_file(struct rev_info *revs, { const unsigned char *sha1; unsigned int mode; + unsigned dirty_submodule = 0; /* * New file in the index: it might actually be different in * the working copy. */ - if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) + if (get_stat_data(new, &sha1, &mode, cached, match_missing, + &dirty_submodule, revs->diffopt.output_format) < 0) return; - diff_index_show_file(revs, "+", new, sha1, mode); + diff_index_show_file(revs, "+", new, sha1, mode, dirty_submodule); } static int show_modified(struct rev_info *revs, @@ -270,11 +284,13 @@ static int show_modified(struct rev_info *revs, { unsigned int mode, oldmode; const unsigned char *sha1; + unsigned dirty_submodule = 0; - if (get_stat_data(new, &sha1, &mode, cached, match_missing) < 0) { + if (get_stat_data(new, &sha1, &mode, cached, match_missing, + &dirty_submodule, revs->diffopt.output_format) < 0) { if (report_missing) diff_index_show_file(revs, "-", old, - old->sha1, old->ce_mode); + old->sha1, old->ce_mode, 0); return -1; } @@ -309,7 +325,7 @@ static int show_modified(struct rev_info *revs, return 0; diff_change(&revs->diffopt, oldmode, mode, - old->sha1, sha1, old->name); + old->sha1, sha1, old->name, 0, dirty_submodule); return 0; } @@ -356,7 +372,7 @@ static void do_oneway_diff(struct unpack_trees_options *o, * Something removed from the tree? */ if (!idx) { - diff_index_show_file(revs, "-", tree, tree->sha1, tree->ce_mode); + diff_index_show_file(revs, "-", tree, tree->sha1, tree->ce_mode, 0); return; } diff --git a/diff.c b/diff.c index 750c066a5e..8986873c0e 100644 --- a/diff.c +++ b/diff.c @@ -2032,7 +2032,7 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only) char *data = xmalloc(100), *dirty = ""; /* Are we looking at the work tree? */ - if (!s->sha1_valid && is_submodule_modified(s->path)) + if (!s->sha1_valid && s->dirty_submodule) dirty = "-dirty"; len = snprintf(data, 100, @@ -3719,7 +3719,7 @@ int diff_result_code(struct diff_options *opt, int status) void diff_addremove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, - const char *concatpath) + const char *concatpath, unsigned dirty_submodule) { struct diff_filespec *one, *two; @@ -3751,8 +3751,10 @@ void diff_addremove(struct diff_options *options, if (addremove != '+') fill_filespec(one, sha1, mode); - if (addremove != '-') + if (addremove != '-') { fill_filespec(two, sha1, mode); + two->dirty_submodule = dirty_submodule; + } diff_queue(&diff_queued_diff, one, two); if (!DIFF_OPT_TST(options, DIFF_FROM_CONTENTS)) @@ -3763,7 +3765,8 @@ void diff_change(struct diff_options *options, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, - const char *concatpath) + const char *concatpath, + unsigned old_dirty_submodule, unsigned new_dirty_submodule) { struct diff_filespec *one, *two; @@ -3776,6 +3779,8 @@ void diff_change(struct diff_options *options, const unsigned char *tmp_c; tmp = old_mode; old_mode = new_mode; new_mode = tmp; tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c; + tmp = old_dirty_submodule; old_dirty_submodule = new_dirty_submodule; + new_dirty_submodule = tmp; } if (options->prefix && @@ -3786,6 +3791,8 @@ void diff_change(struct diff_options *options, two = alloc_filespec(concatpath); fill_filespec(one, old_sha1, old_mode); fill_filespec(two, new_sha1, new_mode); + one->dirty_submodule = old_dirty_submodule; + two->dirty_submodule = new_dirty_submodule; diff_queue(&diff_queued_diff, one, two); if (!DIFF_OPT_TST(options, DIFF_FROM_CONTENTS)) diff --git a/diff.h b/diff.h index 6f6d0ed01d..968a8dce95 100644 --- a/diff.h +++ b/diff.h @@ -14,12 +14,13 @@ typedef void (*change_fn_t)(struct diff_options *options, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, - const char *fullpath); + const char *fullpath, + unsigned old_dirty_submodule, unsigned new_dirty_submodule); typedef void (*add_remove_fn_t)(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, - const char *fullpath); + const char *fullpath, unsigned dirty_submodule); typedef void (*diff_format_fn_t)(struct diff_queue_struct *q, struct diff_options *options, void *data); @@ -177,13 +178,14 @@ extern void diff_addremove(struct diff_options *, int addremove, unsigned mode, const unsigned char *sha1, - const char *fullpath); + const char *fullpath, unsigned dirty_submodule); extern void diff_change(struct diff_options *, unsigned mode1, unsigned mode2, const unsigned char *sha1, const unsigned char *sha2, - const char *fullpath); + const char *fullpath, + unsigned dirty_submodule1, unsigned dirty_submodule2); extern void diff_unmerge(struct diff_options *, const char *path, diff --git a/diffcore.h b/diffcore.h index 5b634585e8..66687c3fe5 100644 --- a/diffcore.h +++ b/diffcore.h @@ -42,6 +42,7 @@ struct diff_filespec { #define DIFF_FILE_VALID(spec) (((spec)->mode) != 0) unsigned should_free : 1; /* data should be free()'ed */ unsigned should_munmap : 1; /* data should be munmap()'ed */ + unsigned dirty_submodule : 1; /* For submodules: its work tree is dirty */ struct userdiff_driver *driver; /* data should be considered "binary"; -1 means "don't know yet" */ diff --git a/revision.c b/revision.c index 25fa14d93e..769cfd4251 100644 --- a/revision.c +++ b/revision.c @@ -268,7 +268,7 @@ static int tree_difference = REV_TREE_SAME; static void file_add_remove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, - const char *fullpath) + const char *fullpath, unsigned dirty_submodule) { int diff = addremove == '+' ? REV_TREE_NEW : REV_TREE_OLD; @@ -281,7 +281,8 @@ static void file_change(struct diff_options *options, unsigned old_mode, unsigned new_mode, const unsigned char *old_sha1, const unsigned char *new_sha1, - const char *fullpath) + const char *fullpath, + unsigned old_dirty_submodule, unsigned new_dirty_submodule) { tree_difference = REV_TREE_DIFFERENT; DIFF_OPT_SET(options, HAS_CHANGES); diff --git a/tree-diff.c b/tree-diff.c index 7d745b4406..fe9f52c479 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -68,7 +68,7 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { newbase[baselen + pathlen1] = 0; opt->change(opt, mode1, mode2, - sha1, sha2, newbase); + sha1, sha2, newbase, 0, 0); newbase[baselen + pathlen1] = '/'; } retval = diff_tree_sha1(sha1, sha2, newbase, opt); @@ -77,7 +77,7 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const } fullname = malloc_fullname(base, baselen, path1, pathlen1); - opt->change(opt, mode1, mode2, sha1, sha2, fullname); + opt->change(opt, mode1, mode2, sha1, sha2, fullname, 0, 0); free(fullname); return 0; } @@ -241,7 +241,7 @@ static void show_entry(struct diff_options *opt, const char *prefix, struct tree if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { newbase[baselen + pathlen] = 0; - opt->add_remove(opt, *prefix, mode, sha1, newbase); + opt->add_remove(opt, *prefix, mode, sha1, newbase, 0); newbase[baselen + pathlen] = '/'; } @@ -252,7 +252,7 @@ static void show_entry(struct diff_options *opt, const char *prefix, struct tree free(newbase); } else { char *fullname = malloc_fullname(base, baselen, path, pathlen); - opt->add_remove(opt, prefix[0], mode, sha1, fullname); + opt->add_remove(opt, prefix[0], mode, sha1, fullname, 0); free(fullname); } } -- cgit v1.2.3 From 125fd98434ce773de45c4a40927c222ec5c43ae1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 24 Jan 2010 00:10:20 -0800 Subject: Make ce_uptodate() trustworthy again The rule has always been that a cache entry that is ce_uptodate(ce) means that we already have checked the work tree entity and we know there is no change in the work tree compared to the index, and nobody should have to double check. Note that false ce_uptodate(ce) does not mean it is known to be dirty---it only means we don't know if it is clean. There are a few codepaths (refresh-index and preload-index are among them) that mark a cache entry as up-to-date based solely on the return value from ie_match_stat(); this function uses lstat() to see if the work tree entity has been touched, and for a submodule entry, if its HEAD points at the same commit as the commit recorded in the index of the superproject (a submodule that is not even cloned is considered clean). A submodule is no longer considered unmodified merely because its HEAD matches the index of the superproject these days, in order to prevent people from forgetting to commit in the submodule and updating the superproject index with the new submodule commit, before commiting the state in the superproject. However, the patch to do so didn't update the codepath that marks cache entries up-to-date based on the updated definition and instead worked it around by saying "we don't trust the return value of ce_uptodate() for submodules." This makes ce_uptodate() trustworthy again by not marking submodule entries up-to-date. The next step _could_ be to introduce a few "in-core" flag bits to cache_entry structure to record "this entry is _known_ to be dirty", call is_submodule_modified() from ie_match_stat(), and use these new bits to avoid running this rather expensive check more than once, but that can be a separate patch. Signed-off-by: Junio C Hamano --- diff-lib.c | 2 +- preload-index.c | 2 ++ read-cache.c | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index 23e180eed1..c6c425e624 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -161,7 +161,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) continue; } - if ((ce_uptodate(ce) && !S_ISGITLINK(ce->ce_mode)) || ce_skip_worktree(ce)) + if (ce_uptodate(ce) || ce_skip_worktree(ce)) continue; /* If CE_VALID is set, don't look at workdir for file removal */ diff --git a/preload-index.c b/preload-index.c index 92899333c2..e3d0bda31a 100644 --- a/preload-index.c +++ b/preload-index.c @@ -47,6 +47,8 @@ static void *preload_thread(void *_data) if (ce_stage(ce)) continue; + if (S_ISGITLINK(ce->ce_mode)) + continue; if (ce_uptodate(ce)) continue; if (!ce_path_match(ce, p->pathspec)) diff --git a/read-cache.c b/read-cache.c index 79938bf09a..309b77a6c9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -612,7 +612,8 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, st, ce_option)) { /* Nothing changed, really */ free(ce); - ce_mark_uptodate(alias); + if (!S_ISGITLINK(alias->ce_mode)) + ce_mark_uptodate(alias); alias->ce_flags |= CE_ADDED; return 0; } @@ -1050,7 +1051,8 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, * because CE_UPTODATE flag is in-core only; * we are not going to write this change out. */ - ce_mark_uptodate(ce); + if (!S_ISGITLINK(ce->ce_mode)) + ce_mark_uptodate(ce); return ce; } } -- cgit v1.2.3 From 4d34477f4c5dbebc55aa1362fd705440590a85f1 Mon Sep 17 00:00:00 2001 From: Jens Lehmann Date: Sat, 23 Jan 2010 17:37:26 +0100 Subject: git diff: Don't test submodule dirtiness with --ignore-submodules The diff family suppresses the output of submodule changes when requested but checks them nonetheless. But since recently submodules get examined for their dirtiness, which is rather expensive. There is no need to do that when the --ignore-submodules option is used, as the gathered information is never used anyway. Signed-off-by: Jens Lehmann Signed-off-by: Junio C Hamano --- diff-lib.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'diff-lib.c') diff --git a/diff-lib.c b/diff-lib.c index c6c425e624..899034d354 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -179,6 +179,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) } changed = ce_match_stat(ce, &st, ce_option); if (S_ISGITLINK(ce->ce_mode) + && !DIFF_OPT_TST(&revs->diffopt, IGNORE_SUBMODULES) && (!changed || (revs->diffopt.output_format & DIFF_FORMAT_PATCH)) && is_submodule_modified(ce->name)) { changed = 1; @@ -220,7 +221,7 @@ static int get_stat_data(struct cache_entry *ce, const unsigned char **sha1p, unsigned int *modep, int cached, int match_missing, - unsigned *dirty_submodule, int output_format) + unsigned *dirty_submodule, struct diff_options *diffopt) { const unsigned char *sha1 = ce->sha1; unsigned int mode = ce->ce_mode; @@ -241,7 +242,8 @@ static int get_stat_data(struct cache_entry *ce, } changed = ce_match_stat(ce, &st, 0); if (S_ISGITLINK(ce->ce_mode) - && (!changed || (output_format & DIFF_FORMAT_PATCH)) + && !DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES) + && (!changed || (diffopt->output_format & DIFF_FORMAT_PATCH)) && is_submodule_modified(ce->name)) { changed = 1; *dirty_submodule = 1; @@ -270,7 +272,7 @@ static void show_new_file(struct rev_info *revs, * the working copy. */ if (get_stat_data(new, &sha1, &mode, cached, match_missing, - &dirty_submodule, revs->diffopt.output_format) < 0) + &dirty_submodule, &revs->diffopt) < 0) return; diff_index_show_file(revs, "+", new, sha1, mode, dirty_submodule); @@ -287,7 +289,7 @@ static int show_modified(struct rev_info *revs, unsigned dirty_submodule = 0; if (get_stat_data(new, &sha1, &mode, cached, match_missing, - &dirty_submodule, revs->diffopt.output_format) < 0) { + &dirty_submodule, &revs->diffopt) < 0) { if (report_missing) diff_index_show_file(revs, "-", old, old->sha1, old->ce_mode, 0); -- cgit v1.2.3