From 78db6ea9dc1a872f9d07a36fe7aec700a5c963b9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:18:29 -0500 Subject: grep: make locking flag global The low-level grep code traditionally didn't care about threading, as it doesn't do any threading itself and didn't call out to other non-thread-safe code. That changed with 0579f91 (grep: enable threading with -p and -W using lazy attribute lookup, 2011-12-12), which pushed the lookup of funcname attributes (which is not thread-safe) into the low-level grep code. As a result, the low-level code learned about a new global "grep_attr_mutex" to serialize access to the attribute code. A multi-threaded caller (e.g., builtin/grep.c) is expected to initialize the mutex and set "use_threads" in the grep_opt structure. The low-level code only uses the lock if use_threads is set. However, putting the use_threads flag into the grep_opt struct is not the most logical place. Whether threading is in use is not something that matters for each call to grep_buffer, but is instead global to the whole program (i.e., if any thread is doing multi-threaded grep, every other thread, even if it thinks it is doing its own single-threaded grep, would need to use the locking). In practice, this distinction isn't a problem for us, because the only user of multi-threaded grep is "git-grep", which does nothing except call grep. This patch turns the opt->use_threads flag into a global flag. More important than the nit-picking semantic argument above is that this means that the locking functions don't need to actually have access to a grep_opt to know whether to lock. Which in turn can make adding new locks simpler, as we don't need to pass around a grep_opt. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin/grep.c') diff --git a/builtin/grep.c b/builtin/grep.c index 9ce064ac11..5b881870f6 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -259,6 +259,7 @@ static void start_threads(struct grep_opt *opt) pthread_cond_init(&cond_add, NULL); pthread_cond_init(&cond_write, NULL); pthread_cond_init(&cond_result, NULL); + grep_use_locks = 1; for (i = 0; i < ARRAY_SIZE(todo); i++) { strbuf_init(&todo[i].out, 0); @@ -307,6 +308,7 @@ static int wait_all(void) pthread_cond_destroy(&cond_add); pthread_cond_destroy(&cond_write); pthread_cond_destroy(&cond_result); + grep_use_locks = 0; return hit; } @@ -1030,8 +1032,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) use_threads = 0; #endif - opt.use_threads = use_threads; - #ifndef NO_PTHREADS if (use_threads) { if (opt.pre_context || opt.post_context || opt.file_break || -- cgit v1.2.3 From b3aeb285d0ac1dcb4d578a61a68e08646f96501f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:18:41 -0500 Subject: grep: move sha1-reading mutex into low-level code The multi-threaded git-grep code needs to serialize access to the thread-unsafe read_sha1_file call. It does this with a mutex that is local to builtin/grep.c. Let's instead push this down into grep.c, where it can be used by both builtin/grep.c and grep.c. This will let us safely teach the low-level grep.c code tricks that involve reading from the object db. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 29 ++++++----------------------- grep.c | 6 ++++++ grep.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 23 deletions(-) (limited to 'builtin/grep.c') diff --git a/builtin/grep.c b/builtin/grep.c index 5b881870f6..aeb361663a 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -85,21 +85,6 @@ static inline void grep_unlock(void) pthread_mutex_unlock(&grep_mutex); } -/* Used to serialize calls to read_sha1_file. */ -static pthread_mutex_t read_sha1_mutex; - -static inline void read_sha1_lock(void) -{ - if (use_threads) - pthread_mutex_lock(&read_sha1_mutex); -} - -static inline void read_sha1_unlock(void) -{ - if (use_threads) - pthread_mutex_unlock(&read_sha1_mutex); -} - /* Signalled when a new work_item is added to todo. */ static pthread_cond_t cond_add; @@ -254,7 +239,7 @@ static void start_threads(struct grep_opt *opt) int i; pthread_mutex_init(&grep_mutex, NULL); - pthread_mutex_init(&read_sha1_mutex, NULL); + pthread_mutex_init(&grep_read_mutex, NULL); pthread_mutex_init(&grep_attr_mutex, NULL); pthread_cond_init(&cond_add, NULL); pthread_cond_init(&cond_write, NULL); @@ -303,7 +288,7 @@ static int wait_all(void) } pthread_mutex_destroy(&grep_mutex); - pthread_mutex_destroy(&read_sha1_mutex); + pthread_mutex_destroy(&grep_read_mutex); pthread_mutex_destroy(&grep_attr_mutex); pthread_cond_destroy(&cond_add); pthread_cond_destroy(&cond_write); @@ -313,8 +298,6 @@ static int wait_all(void) return hit; } #else /* !NO_PTHREADS */ -#define read_sha1_lock() -#define read_sha1_unlock() static int wait_all(void) { @@ -376,9 +359,9 @@ static void *lock_and_read_sha1_file(const unsigned char *sha1, enum object_type { void *data; - read_sha1_lock(); + grep_read_lock(); data = read_sha1_file(sha1, type, size); - read_sha1_unlock(); + grep_read_unlock(); return data; } @@ -617,10 +600,10 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, struct strbuf base; int hit, len; - read_sha1_lock(); + grep_read_lock(); data = read_object_with_reference(obj->sha1, tree_type, &size, NULL); - read_sha1_unlock(); + grep_read_unlock(); if (!data) die(_("unable to read tree (%s)"), sha1_to_hex(obj->sha1)); diff --git a/grep.c b/grep.c index 7a67c2ff6f..db58a29c2b 100644 --- a/grep.c +++ b/grep.c @@ -826,6 +826,12 @@ static inline void grep_attr_unlock(void) if (grep_use_locks) pthread_mutex_unlock(&grep_attr_mutex); } + +/* + * Same as git_attr_mutex, but protecting the thread-unsafe object db access. + */ +pthread_mutex_t grep_read_mutex; + #else #define grep_attr_lock() #define grep_attr_unlock() diff --git a/grep.h b/grep.h index 3653bb333c..4f1b0251b0 100644 --- a/grep.h +++ b/grep.h @@ -139,6 +139,23 @@ extern int grep_threads_ok(const struct grep_opt *opt); */ extern int grep_use_locks; extern pthread_mutex_t grep_attr_mutex; +extern pthread_mutex_t grep_read_mutex; + +static inline void grep_read_lock(void) +{ + if (grep_use_locks) + pthread_mutex_lock(&grep_read_mutex); +} + +static inline void grep_read_unlock(void) +{ + if (grep_use_locks) + pthread_mutex_unlock(&grep_read_mutex); +} + +#else +#define grep_read_lock() +#define grep_read_unlock() #endif #endif -- cgit v1.2.3 From 8f24a6323ece9be1bf1a04b4b5856112438337f2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:19:37 -0500 Subject: convert git-grep to use grep_source interface The grep_source interface (as opposed to grep_buffer) will eventually gives us a richer interface for telling the low-level grep code about our buffers. Eventually this will lead to things like better binary-file handling. For now, it lets us drop a lot of now-redundant code. The conversion is mostly straight-forward. One thing to note is that the memory ownership rules for "struct grep_source" are different than the "struct work_item" found here (the former will copy things like the filename, rather than taking ownership). Therefore you will also see some slight tweaking of when filename buffers are released. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 142 ++++++++++----------------------------------------------- 1 file changed, 23 insertions(+), 119 deletions(-) (limited to 'builtin/grep.c') diff --git a/builtin/grep.c b/builtin/grep.c index aeb361663a..c9b6a138fe 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -29,25 +29,12 @@ static int use_threads = 1; #define THREADS 8 static pthread_t threads[THREADS]; -static void *load_sha1(const unsigned char *sha1, unsigned long *size, - const char *name); -static void *load_file(const char *filename, size_t *sz); - -enum work_type {WORK_SHA1, WORK_FILE}; - /* We use one producer thread and THREADS consumer * threads. The producer adds struct work_items to 'todo' and the * consumers pick work items from the same array. */ struct work_item { - enum work_type type; - char *name; - - /* if type == WORK_SHA1, then 'identifier' is a SHA1, - * otherwise type == WORK_FILE, and 'identifier' is a NUL - * terminated filename. - */ - void *identifier; + struct grep_source source; char done; struct strbuf out; }; @@ -98,7 +85,8 @@ static pthread_cond_t cond_result; static int skip_first_line; -static void add_work(enum work_type type, char *name, void *id) +static void add_work(enum grep_source_type type, const char *name, + const void *id) { grep_lock(); @@ -106,9 +94,7 @@ static void add_work(enum work_type type, char *name, void *id) pthread_cond_wait(&cond_write, &grep_mutex); } - todo[todo_end].type = type; - todo[todo_end].name = name; - todo[todo_end].identifier = id; + grep_source_init(&todo[todo_end].source, type, name, id); todo[todo_end].done = 0; strbuf_reset(&todo[todo_end].out); todo_end = (todo_end + 1) % ARRAY_SIZE(todo); @@ -136,21 +122,6 @@ static struct work_item *get_work(void) return ret; } -static void grep_sha1_async(struct grep_opt *opt, char *name, - const unsigned char *sha1) -{ - unsigned char *s; - s = xmalloc(20); - memcpy(s, sha1, 20); - add_work(WORK_SHA1, name, s); -} - -static void grep_file_async(struct grep_opt *opt, char *name, - const char *filename) -{ - add_work(WORK_FILE, name, xstrdup(filename)); -} - static void work_done(struct work_item *w) { int old_done; @@ -177,8 +148,7 @@ static void work_done(struct work_item *w) write_or_die(1, p, len); } - free(w->name); - free(w->identifier); + grep_source_clear(&w->source); } if (old_done != todo_done) @@ -201,25 +171,8 @@ static void *run(void *arg) break; opt->output_priv = w; - if (w->type == WORK_SHA1) { - unsigned long sz; - void* data = load_sha1(w->identifier, &sz, w->name); - - if (data) { - hit |= grep_buffer(opt, w->name, data, sz); - free(data); - } - } else if (w->type == WORK_FILE) { - size_t sz; - void* data = load_file(w->identifier, &sz); - if (data) { - hit |= grep_buffer(opt, w->name, data, sz); - free(data); - } - } else { - assert(0); - } - + hit |= grep_source(opt, &w->source); + grep_source_clear_data(&w->source); work_done(w); } free_grep_patterns(arg); @@ -365,23 +318,10 @@ static void *lock_and_read_sha1_file(const unsigned char *sha1, enum object_type return data; } -static void *load_sha1(const unsigned char *sha1, unsigned long *size, - const char *name) -{ - enum object_type type; - void *data = lock_and_read_sha1_file(sha1, &type, size); - - if (!data) - error(_("'%s': unable to read %s"), name, sha1_to_hex(sha1)); - - return data; -} - static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *filename, int tree_name_len) { struct strbuf pathbuf = STRBUF_INIT; - char *name; if (opt->relative && opt->prefix_length) { quote_path_relative(filename + tree_name_len, -1, &pathbuf, @@ -391,87 +331,51 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, strbuf_addstr(&pathbuf, filename); } - name = strbuf_detach(&pathbuf, NULL); - #ifndef NO_PTHREADS if (use_threads) { - grep_sha1_async(opt, name, sha1); + add_work(GREP_SOURCE_SHA1, pathbuf.buf, sha1); + strbuf_release(&pathbuf); return 0; } else #endif { + struct grep_source gs; int hit; - unsigned long sz; - void *data = load_sha1(sha1, &sz, name); - if (!data) - hit = 0; - else - hit = grep_buffer(opt, name, data, sz); - free(data); - free(name); - return hit; - } -} + grep_source_init(&gs, GREP_SOURCE_SHA1, pathbuf.buf, sha1); + strbuf_release(&pathbuf); + hit = grep_source(opt, &gs); -static void *load_file(const char *filename, size_t *sz) -{ - struct stat st; - char *data; - int i; - - if (lstat(filename, &st) < 0) { - err_ret: - if (errno != ENOENT) - error(_("'%s': %s"), filename, strerror(errno)); - return NULL; - } - if (!S_ISREG(st.st_mode)) - return NULL; - *sz = xsize_t(st.st_size); - i = open(filename, O_RDONLY); - if (i < 0) - goto err_ret; - data = xmalloc(*sz + 1); - if (st.st_size != read_in_full(i, data, *sz)) { - error(_("'%s': short read %s"), filename, strerror(errno)); - close(i); - free(data); - return NULL; + grep_source_clear(&gs); + return hit; } - close(i); - data[*sz] = 0; - return data; } static int grep_file(struct grep_opt *opt, const char *filename) { struct strbuf buf = STRBUF_INIT; - char *name; if (opt->relative && opt->prefix_length) quote_path_relative(filename, -1, &buf, opt->prefix); else strbuf_addstr(&buf, filename); - name = strbuf_detach(&buf, NULL); #ifndef NO_PTHREADS if (use_threads) { - grep_file_async(opt, name, filename); + add_work(GREP_SOURCE_FILE, buf.buf, filename); + strbuf_release(&buf); return 0; } else #endif { + struct grep_source gs; int hit; - size_t sz; - void *data = load_file(filename, &sz); - if (!data) - hit = 0; - else - hit = grep_buffer(opt, name, data, sz); - free(data); - free(name); + grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename); + strbuf_release(&buf); + hit = grep_source(opt, &gs); + + grep_source_clear(&gs); return hit; } } -- cgit v1.2.3 From 9dd5245c1043dd18fd7b3f44b9e51eef7e4b58d8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 2 Feb 2012 03:24:28 -0500 Subject: grep: pre-load userdiff drivers when threaded The low-level grep_source code will automatically load the userdiff driver to see whether a file is binary. However, when we are threaded, it will load the drivers in a non-deterministic order, handling each one as its assigned thread happens to be scheduled. Meanwhile, the attribute lookup code (which underlies the userdiff driver lookup) is optimized to handle paths in sequential order (because they tend to share the same gitattributes files). Multi-threading the lookups destroys the locality and makes this optimization less effective. We can fix this by pre-loading the userdiff driver in the main thread, before we hand off the file to a worker thread. My best-of-five for "git grep foo" on the linux-2.6 repository went from: real 0m0.391s user 0m1.708s sys 0m0.584s to: real 0m0.360s user 0m1.576s sys 0m0.572s Not a huge speedup, but it's quite easy to do. The only trick is that we shouldn't perform this optimization if "-a" was used, in which case we won't bother checking whether the files are binary at all. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'builtin/grep.c') diff --git a/builtin/grep.c b/builtin/grep.c index c9b6a138fe..e741aca18c 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -85,8 +85,8 @@ static pthread_cond_t cond_result; static int skip_first_line; -static void add_work(enum grep_source_type type, const char *name, - const void *id) +static void add_work(struct grep_opt *opt, enum grep_source_type type, + const char *name, const void *id) { grep_lock(); @@ -95,6 +95,8 @@ static void add_work(enum grep_source_type type, const char *name, } grep_source_init(&todo[todo_end].source, type, name, id); + if (opt->binary != GREP_BINARY_TEXT) + grep_source_load_driver(&todo[todo_end].source); todo[todo_end].done = 0; strbuf_reset(&todo[todo_end].out); todo_end = (todo_end + 1) % ARRAY_SIZE(todo); @@ -333,7 +335,7 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, #ifndef NO_PTHREADS if (use_threads) { - add_work(GREP_SOURCE_SHA1, pathbuf.buf, sha1); + add_work(opt, GREP_SOURCE_SHA1, pathbuf.buf, sha1); strbuf_release(&pathbuf); return 0; } else @@ -362,7 +364,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) #ifndef NO_PTHREADS if (use_threads) { - add_work(GREP_SOURCE_FILE, buf.buf, filename); + add_work(opt, GREP_SOURCE_FILE, buf.buf, filename); strbuf_release(&buf); return 0; } else -- cgit v1.2.3 From 6680a0874f86600cc8b4b5327384934d83d752a8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 7 Feb 2012 13:23:02 -0500 Subject: drop odd return value semantics from userdiff_config When the userdiff_config function was introduced in be58e70 (diff: unify external diff and funcname parsing code, 2008-10-05), it used a return value convention unlike any other config callback. Like other callbacks, it used "-1" to signal error. But it returned "1" to indicate that it found something, and "0" otherwise; other callbacks simply returned "0" to indicate that no error occurred. This distinction was necessary at the time, because the userdiff namespace overlapped slightly with the color configuration namespace. So "diff.color.foo" could mean "the 'foo' slot of diff coloring" or "the 'foo' component of the "color" userdiff driver". Because the color-parsing code would die on an unknown color slot, we needed the userdiff code to indicate that it had matched the variable, letting us bypass the color-parsing code entirely. Later, in 8b8e862 (ignore unknown color configuration, 2009-12-12), the color-parsing code learned to silently ignore unknown slots. This means we no longer need to protect userdiff-matched variables from reaching the color-parsing code. We can therefore change the userdiff_config calling convention to a more normal one. This drops some code from each caller, which is nice. But more importantly, it reduces the cognitive load for readers who may wonder why userdiff_config is unlike every other config callback. There's no need to add a new test confirming that this works; t4020 already contains a test that sets diff.color.external. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 8 +------- builtin/cat-file.c | 8 +------- builtin/grep.c | 7 ++----- diff.c | 7 ++----- userdiff.c | 19 ++++++------------- 5 files changed, 12 insertions(+), 37 deletions(-) (limited to 'builtin/grep.c') diff --git a/builtin/blame.c b/builtin/blame.c index 5a67c202f0..01956c8081 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2050,14 +2050,8 @@ static int git_blame_config(const char *var, const char *value, void *cb) return 0; } - switch (userdiff_config(var, value)) { - case 0: - break; - case -1: + if (userdiff_config(var, value) < 0) return -1; - default: - return 0; - } return git_default_config(var, value, cb); } diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 07bd984084..8ed501f220 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -226,14 +226,8 @@ static const char * const cat_file_usage[] = { static int git_cat_file_config(const char *var, const char *value, void *cb) { - switch (userdiff_config(var, value)) { - case 0: - break; - case -1: + if (userdiff_config(var, value) < 0) return -1; - default: - return 0; - } return git_default_config(var, value, cb); } diff --git a/builtin/grep.c b/builtin/grep.c index 5c2ae94e55..dc6de83ab7 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -325,11 +325,8 @@ static int grep_config(const char *var, const char *value, void *cb) struct grep_opt *opt = cb; char *color = NULL; - switch (userdiff_config(var, value)) { - case 0: break; - case -1: return -1; - default: return 0; - } + if (userdiff_config(var, value) < 0) + return -1; if (!strcmp(var, "grep.extendedregexp")) { if (git_config_bool(var, value)) diff --git a/diff.c b/diff.c index 7e154265f7..a656f5e83e 100644 --- a/diff.c +++ b/diff.c @@ -177,11 +177,8 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) return 0; } - switch (userdiff_config(var, value)) { - case 0: break; - case -1: return -1; - default: return 0; - } + if (userdiff_config(var, value) < 0) + return -1; if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) { int slot = parse_diff_color_slot(var, 11); diff --git a/userdiff.c b/userdiff.c index 76109da4bc..1e7184f7f0 100644 --- a/userdiff.c +++ b/userdiff.c @@ -210,14 +210,7 @@ static int parse_funcname(struct userdiff_funcname *f, const char *k, if (git_config_string(&f->pattern, k, v) < 0) return -1; f->cflags = cflags; - return 1; -} - -static int parse_string(const char **d, const char *k, const char *v) -{ - if (git_config_string(d, k, v) < 0) - return -1; - return 1; + return 0; } static int parse_tristate(int *b, const char *k, const char *v) @@ -226,13 +219,13 @@ static int parse_tristate(int *b, const char *k, const char *v) *b = -1; else *b = git_config_bool(k, v); - return 1; + return 0; } static int parse_bool(int *b, const char *k, const char *v) { *b = git_config_bool(k, v); - return 1; + return 0; } int userdiff_config(const char *k, const char *v) @@ -246,13 +239,13 @@ int userdiff_config(const char *k, const char *v) if ((drv = parse_driver(k, v, "binary"))) return parse_tristate(&drv->binary, k, v); if ((drv = parse_driver(k, v, "command"))) - return parse_string(&drv->external, k, v); + return git_config_string(&drv->external, k, v); if ((drv = parse_driver(k, v, "textconv"))) - return parse_string(&drv->textconv, k, v); + return git_config_string(&drv->textconv, k, v); if ((drv = parse_driver(k, v, "cachetextconv"))) return parse_bool(&drv->textconv_want_cache, k, v); if ((drv = parse_driver(k, v, "wordregex"))) - return parse_string(&drv->word_regex, k, v); + return git_config_string(&drv->word_regex, k, v); return 0; } -- cgit v1.2.3