From 3ffefb54c0515308ceafb6ba071567d9fd379498 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:36:52 -0400 Subject: commit_tree: take a pointer/len pair rather than a const strbuf While strbufs are pretty common throughout our code, it is more flexible for functions to take a pointer/len pair than a strbuf. It's easy to turn a strbuf into such a pair (by dereferencing its members), but less easy to go the other way (you can strbuf_attach, but that has implications about memory ownership). This patch teaches commit_tree (and its associated callers and sub-functions) to take such a pair for the commit message rather than a strbuf. This makes passing the buffer around slightly more verbose, but means we can get rid of some dangerous strbuf_attach calls in the next patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index f4793316a2..bd3d5afcd6 100644 --- a/commit.c +++ b/commit.c @@ -1344,7 +1344,8 @@ void free_commit_extra_headers(struct commit_extra_header *extra) } } -int commit_tree(const struct strbuf *msg, const unsigned char *tree, +int commit_tree(const char *msg, size_t msg_len, + const unsigned char *tree, struct commit_list *parents, unsigned char *ret, const char *author, const char *sign_commit) { @@ -1352,7 +1353,7 @@ int commit_tree(const struct strbuf *msg, const unsigned char *tree, int result; append_merge_tag_headers(parents, &tail); - result = commit_tree_extended(msg, tree, parents, ret, + result = commit_tree_extended(msg, msg_len, tree, parents, ret, author, sign_commit, extra); free_commit_extra_headers(extra); return result; @@ -1473,7 +1474,8 @@ static const char commit_utf8_warn[] = "You may want to amend it after fixing the message, or set the config\n" "variable i18n.commitencoding to the encoding your project uses.\n"; -int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, +int commit_tree_extended(const char *msg, size_t msg_len, + const unsigned char *tree, struct commit_list *parents, unsigned char *ret, const char *author, const char *sign_commit, struct commit_extra_header *extra) @@ -1484,7 +1486,7 @@ int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, assert_sha1_type(tree, OBJ_TREE); - if (memchr(msg->buf, '\0', msg->len)) + if (memchr(msg, '\0', msg_len)) return error("a NUL byte in commit log message not allowed."); /* Not having i18n.commitencoding is the same as having utf-8 */ @@ -1523,7 +1525,7 @@ int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, strbuf_addch(&buffer, '\n'); /* And add the comment */ - strbuf_addbuf(&buffer, msg); + strbuf_add(&buffer, msg, msg_len); /* And check the encoding */ if (encoding_is_utf8 && !verify_utf8(&buffer)) -- cgit v1.2.3 From 969eba6341a5af8ac52c67e26462548ed05e23e3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:04 -0400 Subject: commit: push commit_index update into alloc_commit_node Whenever we create a commit object via lookup_commit, we give it a unique index to be used with the commit-slab API. The theory is that any "struct commit" we create would follow this code path, so any such struct would get an index. However, callers could use alloc_commit_node() directly (and get multiple commits with index 0). Let's push the indexing into alloc_commit_node so that it's hard for callers to get it wrong. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- alloc.c | 12 ++++++++++-- commit.c | 2 -- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'commit.c') diff --git a/alloc.c b/alloc.c index 38ff7e7e8e..eb22a45c9d 100644 --- a/alloc.c +++ b/alloc.c @@ -47,10 +47,18 @@ union any_object { DEFINE_ALLOCATOR(blob, struct blob) DEFINE_ALLOCATOR(tree, struct tree) -DEFINE_ALLOCATOR(commit, struct commit) +DEFINE_ALLOCATOR(raw_commit, struct commit) DEFINE_ALLOCATOR(tag, struct tag) DEFINE_ALLOCATOR(object, union any_object) +void *alloc_commit_node(void) +{ + static int commit_count; + struct commit *c = alloc_raw_commit_node(); + c->index = commit_count++; + return c; +} + static void report(const char *name, unsigned int count, size_t size) { fprintf(stderr, "%10s: %8u (%"PRIuMAX" kB)\n", @@ -64,7 +72,7 @@ void alloc_report(void) { REPORT(blob, struct blob); REPORT(tree, struct tree); - REPORT(commit, struct commit); + REPORT(raw_commit, struct commit); REPORT(tag, struct tag); REPORT(object, union any_object); } diff --git a/commit.c b/commit.c index bd3d5afcd6..fbdc480ccf 100644 --- a/commit.c +++ b/commit.c @@ -17,7 +17,6 @@ static struct commit_extra_header *read_commit_extra_header_lines(const char *bu int save_commit_buffer = 1; const char *commit_type = "commit"; -static int commit_count; static struct commit *check_commit(struct object *obj, const unsigned char *sha1, @@ -64,7 +63,6 @@ struct commit *lookup_commit(const unsigned char *sha1) struct object *obj = lookup_object(sha1); if (!obj) { struct commit *c = alloc_commit_node(); - c->index = commit_count++; return create_object(sha1, OBJ_COMMIT, c); } if (!obj->type) -- cgit v1.2.3 From 0fb370da9ca972f9571530f95c0dacb31368c280 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 12 Jun 2014 18:05:37 -0400 Subject: provide a helper to free commit buffer This converts two lines into one at each caller. But more importantly, it abstracts the concept of freeing the buffer, which will make it easier to change later. Note that we also need to provide a "detach" mechanism for a tricky case in index-pack. We are passed a buffer for the object generated by processing the incoming pack. If we are not using --strict, we just calculate the sha1 on that buffer and return, leaving the caller to free it. But if we are using --strict, we actually attach that buffer to an object, pass the object to the fsck functions, and then detach the buffer from the object again (so that the caller can free it as usual). In this case, we don't want to free the buffer ourselves, but just make sure it is no longer associated with the commit. Note that we are making the assumption here that the attach/detach process does not impact the buffer at all (e.g., it is never reallocated or modified). That holds true now, and we have no plans to change that. However, as we abstract the commit_buffer code, this dependency becomes less obvious. So when we detach, let's also make sure that we get back the same buffer that we gave to the commit_buffer code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 3 +-- builtin/index-pack.c | 3 ++- builtin/log.c | 6 ++---- builtin/rev-list.c | 3 +-- commit.c | 13 +++++++++++++ commit.h | 11 +++++++++++ 6 files changed, 30 insertions(+), 9 deletions(-) (limited to 'commit.c') diff --git a/builtin/fsck.c b/builtin/fsck.c index fc150c8821..8aadca160e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -310,8 +310,7 @@ static int fsck_obj(struct object *obj) if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); if (!commit->parents && show_root) printf("root %s\n", sha1_to_hex(commit->object.sha1)); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index b9f6e12c0e..42551ce4ff 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -774,7 +774,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - commit->buffer = NULL; + if (detach_commit_buffer(commit) != data) + die("BUG: parse_object_buffer transmogrified our buffer"); } obj->flags |= FLAG_CHECKED; } diff --git a/builtin/log.c b/builtin/log.c index 39e8836352..226f8f2980 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -349,8 +349,7 @@ static int cmd_log_walk(struct rev_info *rev) rev->max_count++; if (!rev->reflog_info) { /* we allow cycles in reflog ancestry */ - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); } free_commit_list(commit->parents); commit->parents = NULL; @@ -1508,8 +1507,7 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix) reopen_stdout(rev.numbered_files ? NULL : commit, NULL, &rev, quiet)) die(_("Failed to create output files")); shown = log_tree_commit(&rev, commit); - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); /* We put one extra blank line between formatted * patches and this flag is used by log-tree code diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 9f92905379..e012ebe502 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -173,8 +173,7 @@ static void finish_commit(struct commit *commit, void *data) free_commit_list(commit->parents); commit->parents = NULL; } - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); } static void finish_object(struct object *obj, diff --git a/commit.c b/commit.c index fbdc480ccf..11a05c1f24 100644 --- a/commit.c +++ b/commit.c @@ -245,6 +245,19 @@ int unregister_shallow(const unsigned char *sha1) return 0; } +void free_commit_buffer(struct commit *commit) +{ + free(commit->buffer); + commit->buffer = NULL; +} + +const void *detach_commit_buffer(struct commit *commit) +{ + void *ret = commit->buffer; + commit->buffer = NULL; + return ret; +} + int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) { const char *tail = buffer; diff --git a/commit.h b/commit.h index 4df48cb688..d72ed43340 100644 --- a/commit.h +++ b/commit.h @@ -51,6 +51,17 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s int parse_commit(struct commit *item); void parse_commit_or_die(struct commit *item); +/* + * Free any cached object buffer associated with the commit. + */ +void free_commit_buffer(struct commit *); + +/* + * Disassociate any cached object buffer from the commit, but do not free it. + * The buffer (or NULL, if none) is returned. + */ +const void *detach_commit_buffer(struct commit *); + /* Find beginning and length of commit subject. */ int find_commit_subject(const char *commit_buffer, const char **subject); -- cgit v1.2.3 From 66c2827ea4deb24ff541e30a5b6239ad5e9f6801 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:14 -0400 Subject: provide a helper to set the commit buffer Right now this is just a one-liner, but abstracting it will make it easier to change later. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- commit.c | 7 ++++++- commit.h | 6 ++++++ object.c | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'commit.c') diff --git a/builtin/blame.c b/builtin/blame.c index 85a3681306..38784ab9d6 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2046,7 +2046,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ident, ident, path, (!contents_from ? path : (!strcmp(contents_from, "-") ? "standard input" : contents_from))); - commit->buffer = strbuf_detach(&msg, NULL); + set_commit_buffer(commit, strbuf_detach(&msg, NULL)); if (!contents_from || strcmp("-", contents_from)) { struct stat st; diff --git a/commit.c b/commit.c index 11a05c1f24..fc8b4e287d 100644 --- a/commit.c +++ b/commit.c @@ -245,6 +245,11 @@ int unregister_shallow(const unsigned char *sha1) return 0; } +void set_commit_buffer(struct commit *commit, void *buffer) +{ + commit->buffer = buffer; +} + void free_commit_buffer(struct commit *commit) { free(commit->buffer); @@ -335,7 +340,7 @@ int parse_commit(struct commit *item) } ret = parse_commit_buffer(item, buffer, size); if (save_commit_buffer && !ret) { - item->buffer = buffer; + set_commit_buffer(item, buffer); return 0; } free(buffer); diff --git a/commit.h b/commit.h index d72ed43340..cc89128894 100644 --- a/commit.h +++ b/commit.h @@ -51,6 +51,12 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s int parse_commit(struct commit *item); void parse_commit_or_die(struct commit *item); +/* + * Associate an object buffer with the commit. The ownership of the + * memory is handed over to the commit, and must be free()-able. + */ +void set_commit_buffer(struct commit *, void *buffer); + /* * Free any cached object buffer associated with the commit. */ diff --git a/object.c b/object.c index 57a0890a87..44ca657204 100644 --- a/object.c +++ b/object.c @@ -198,7 +198,7 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t if (parse_commit_buffer(commit, buffer, size)) return NULL; if (!commit->buffer) { - commit->buffer = buffer; + set_commit_buffer(commit, buffer); *eaten_p = 1; } obj = &commit->object; -- cgit v1.2.3 From 152ff1ccebd822fd97f27d2a6c3fa2058f088fd8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:39 -0400 Subject: provide helpers to access the commit buffer Many sites look at commit->buffer to get more detailed information than what is in the parsed commit struct. However, we sometimes drop commit->buffer to save memory, in which case the caller would need to read the object afresh. Some callers do this (leading to duplicated code), and others do not (which opens the possibility of a segfault if somebody else frees the buffer). Let's provide a pair of helpers, "get" and "unuse", that let callers easily get the buffer. They will use the cached buffer when possible, and otherwise load from disk using read_sha1_file. Note that we also need to add a "get_cached" variant which returns NULL when we do not have a cached buffer. At first glance this seems to defeat the purpose of "get", which is to always provide a return value. However, some log code paths actually use the NULL-ness of commit->buffer as a boolean flag to decide whether to try printing the commit. At least for now, we want to continue supporting that use. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 28 ++++++++++++++++++++++++++++ commit.h | 21 +++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'commit.c') diff --git a/commit.c b/commit.c index fc8b4e287d..b6b0e0d6fb 100644 --- a/commit.c +++ b/commit.c @@ -250,6 +250,34 @@ void set_commit_buffer(struct commit *commit, void *buffer) commit->buffer = buffer; } +const void *get_cached_commit_buffer(const struct commit *commit) +{ + return commit->buffer; +} + +const void *get_commit_buffer(const struct commit *commit) +{ + const void *ret = get_cached_commit_buffer(commit); + if (!ret) { + enum object_type type; + unsigned long size; + ret = read_sha1_file(commit->object.sha1, &type, &size); + if (!ret) + die("cannot read commit object %s", + sha1_to_hex(commit->object.sha1)); + if (type != OBJ_COMMIT) + die("expected commit for %s, got %s", + sha1_to_hex(commit->object.sha1), typename(type)); + } + return ret; +} + +void unuse_commit_buffer(const struct commit *commit, const void *buffer) +{ + if (commit->buffer != buffer) + free((void *)buffer); +} + void free_commit_buffer(struct commit *commit) { free(commit->buffer); diff --git a/commit.h b/commit.h index cc89128894..259c0aec75 100644 --- a/commit.h +++ b/commit.h @@ -57,6 +57,27 @@ void parse_commit_or_die(struct commit *item); */ void set_commit_buffer(struct commit *, void *buffer); +/* + * Get any cached object buffer associated with the commit. Returns NULL + * if none. The resulting memory should not be freed. + */ +const void *get_cached_commit_buffer(const struct commit *); + +/* + * Get the commit's object contents, either from cache or by reading the object + * from disk. The resulting memory should not be modified, and must be given + * to unuse_commit_buffer when the caller is done. + */ +const void *get_commit_buffer(const struct commit *); + +/* + * Tell the commit subsytem that we are done with a particular commit buffer. + * The commit and buffer should be the input and return value, respectively, + * from an earlier call to get_commit_buffer. The buffer may or may not be + * freed by this call; callers should not access the memory afterwards. + */ +void unuse_commit_buffer(const struct commit *, const void *buffer); + /* * Free any cached object buffer associated with the commit. */ -- cgit v1.2.3 From ba41c1c93fd9109eae954f75a8cb8e32c3e29530 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:41:02 -0400 Subject: use get_commit_buffer to avoid duplicate code For both of these sites, we already do the "fallback to read_sha1_file" trick. But we can shorten the code by just using get_commit_buffer. Note that the error cases are slightly different when read_sha1_file fails. get_commit_buffer will die() if the object cannot be loaded, or is a non-commit. For get_sha1_oneline, this will almost certainly never happen, as we will have just called parse_object (and if it does, it's probably worth complaining about). For record_author_date, the new behavior is probably better; we notify the user of the error instead of silently ignoring it. And because it's used only for sorting by author-date, somebody examining a corrupt repo can fallback to the regular traversal order. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 16 +++------------- sha1_name.c | 18 ++++-------------- 2 files changed, 7 insertions(+), 27 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index b6b0e0d6fb..1903dde285 100644 --- a/commit.c +++ b/commit.c @@ -583,22 +583,12 @@ static void record_author_date(struct author_date_slab *author_date, struct commit *commit) { const char *buf, *line_end, *ident_line; - char *buffer = NULL; + const char *buffer = get_commit_buffer(commit); struct ident_split ident; char *date_end; unsigned long date; - if (!commit->buffer) { - unsigned long size; - enum object_type type; - buffer = read_sha1_file(commit->object.sha1, &type, &size); - if (!buffer) - return; - } - - for (buf = commit->buffer ? commit->buffer : buffer; - buf; - buf = line_end + 1) { + for (buf = buffer; buf; buf = line_end + 1) { line_end = strchrnul(buf, '\n'); ident_line = skip_prefix(buf, "author "); if (!ident_line) { @@ -619,7 +609,7 @@ static void record_author_date(struct author_date_slab *author_date, *(author_date_slab_at(author_date, commit)) = date; fail_exit: - free(buffer); + unuse_commit_buffer(commit, buffer); } static int compare_commits_by_author_date(const void *a_, const void *b_, diff --git a/sha1_name.c b/sha1_name.c index 2b6322fad0..0a65d234de 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -862,27 +862,17 @@ static int get_sha1_oneline(const char *prefix, unsigned char *sha1, commit_list_insert(l->item, &backup); } while (list) { - char *p, *to_free = NULL; + const char *p, *buf; struct commit *commit; - enum object_type type; - unsigned long size; int matches; commit = pop_most_recent_commit(&list, ONELINE_SEEN); if (!parse_object(commit->object.sha1)) continue; - if (commit->buffer) - p = commit->buffer; - else { - p = read_sha1_file(commit->object.sha1, &type, &size); - if (!p) - continue; - to_free = p; - } - - p = strstr(p, "\n\n"); + buf = get_commit_buffer(commit); + p = strstr(buf, "\n\n"); matches = p && !regexec(®ex, p + 2, 0, NULL, 0); - free(to_free); + unuse_commit_buffer(commit, buf); if (matches) { hashcpy(sha1, commit->object.sha1); -- cgit v1.2.3 From c1b3c71f4b4571abb2b2a457122fd100dc9f7eb0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:43:02 -0400 Subject: commit: convert commit->buffer to a slab This will make it easier to manage the buffer cache independently of the "struct commit" objects. It also shrinks "struct commit" by one pointer, which may be helpful. Unfortunately it does not reduce the max memory size of something like "rev-list", because rev-list uses get_cached_commit_buffer() to decide not to show each commit's output (and due to the design of slab_at, accessing the slab requires us to extend it, allocating exactly the same number of buffer pointers we dropped from the commit structs). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 20 +++++++++++++------- commit.h | 1 - 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 1903dde285..e289c78327 100644 --- a/commit.c +++ b/commit.c @@ -245,14 +245,17 @@ int unregister_shallow(const unsigned char *sha1) return 0; } +define_commit_slab(buffer_slab, void *); +static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab); + void set_commit_buffer(struct commit *commit, void *buffer) { - commit->buffer = buffer; + *buffer_slab_at(&buffer_slab, commit) = buffer; } const void *get_cached_commit_buffer(const struct commit *commit) { - return commit->buffer; + return *buffer_slab_at(&buffer_slab, commit); } const void *get_commit_buffer(const struct commit *commit) @@ -274,20 +277,23 @@ const void *get_commit_buffer(const struct commit *commit) void unuse_commit_buffer(const struct commit *commit, const void *buffer) { - if (commit->buffer != buffer) + void *cached = *buffer_slab_at(&buffer_slab, commit); + if (cached != buffer) free((void *)buffer); } void free_commit_buffer(struct commit *commit) { - free(commit->buffer); - commit->buffer = NULL; + void **b = buffer_slab_at(&buffer_slab, commit); + free(*b); + *b = NULL; } const void *detach_commit_buffer(struct commit *commit) { - void *ret = commit->buffer; - commit->buffer = NULL; + void **b = buffer_slab_at(&buffer_slab, commit); + void *ret = *b; + *b = NULL; return ret; } diff --git a/commit.h b/commit.h index 5ce5ce72c3..e1c25692f1 100644 --- a/commit.h +++ b/commit.h @@ -20,7 +20,6 @@ struct commit { unsigned long date; struct commit_list *parents; struct tree *tree; - char *buffer; }; extern int save_commit_buffer; -- cgit v1.2.3 From 8597ea3afea067b39ba7d4adae7ec6c1ee0e7c91 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:44:13 -0400 Subject: commit: record buffer length in cache Most callsites which use the commit buffer try to use the cached version attached to the commit, rather than re-reading from disk. Unfortunately, that interface provides only a pointer to the NUL-terminated buffer, with no indication of the original length. For the most part, this doesn't matter. People do not put NULs in their commit messages, and the log code is happy to treat it all as a NUL-terminated string. However, some code paths do care. For example, when checking signatures, we want to be very careful that we verify all the bytes to avoid malicious trickery. This patch just adds an optional "size" out-pointer to get_commit_buffer and friends. The existing callers all pass NULL (there did not seem to be any obvious sites where we could avoid an immediate strlen() call, though perhaps with some further refactoring we could). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 14 ++++++++++++- builtin/fast-export.c | 2 +- builtin/fmt-merge-msg.c | 2 +- builtin/index-pack.c | 2 +- builtin/log.c | 2 +- builtin/rev-list.c | 2 +- commit.c | 54 ++++++++++++++++++++++++++++++++----------------- commit.h | 8 ++++---- fsck.c | 2 +- log-tree.c | 2 +- merge-recursive.c | 2 +- notes-merge.c | 2 +- object.c | 4 ++-- pretty.c | 4 ++-- sequencer.c | 2 +- sha1_name.c | 2 +- 16 files changed, 68 insertions(+), 38 deletions(-) (limited to 'commit.c') diff --git a/builtin/blame.c b/builtin/blame.c index 857d98a324..b84e375b5c 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1998,6 +1998,18 @@ static void append_merge_parents(struct commit_list **tail) strbuf_release(&line); } +/* + * This isn't as simple as passing sb->buf and sb->len, because we + * want to transfer ownership of the buffer to the commit (so we + * must use detach). + */ +static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb) +{ + size_t len; + void *buf = strbuf_detach(sb, &len); + set_commit_buffer(c, buf, len); +} + /* * Prepare a dummy commit that represents the work tree (or staged) item. * Note that annotating work tree item never works in the reverse. @@ -2046,7 +2058,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ident, ident, path, (!contents_from ? path : (!strcmp(contents_from, "-") ? "standard input" : contents_from))); - set_commit_buffer(commit, strbuf_detach(&msg, NULL)); + set_commit_buffer_from_strbuf(commit, &msg); if (!contents_from || strcmp("-", contents_from)) { struct stat st; diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 7ee5e08442..05d161f19f 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -289,7 +289,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - commit_buffer = get_commit_buffer(commit); + commit_buffer = get_commit_buffer(commit, NULL); author = strstr(commit_buffer, "\nauthor "); if (!author) die ("Could not find author in commit %s", diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index 01f6d59eef..ef8b254ef2 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -236,7 +236,7 @@ static void record_person(int which, struct string_list *people, const char *field; field = (which == 'a') ? "\nauthor " : "\ncommitter "; - buffer = get_commit_buffer(commit); + buffer = get_commit_buffer(commit, NULL); name = strstr(buffer, field); if (!name) return; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 42551ce4ff..459b9f07bb 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -774,7 +774,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - if (detach_commit_buffer(commit) != data) + if (detach_commit_buffer(commit, NULL) != data) die("BUG: parse_object_buffer transmogrified our buffer"); } obj->flags |= FLAG_CHECKED; diff --git a/builtin/log.c b/builtin/log.c index 2c742606bc..c599eacf72 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -919,7 +919,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, &need_8bit_cte); for (i = 0; !need_8bit_cte && i < nr; i++) { - const char *buf = get_commit_buffer(list[i]); + const char *buf = get_commit_buffer(list[i], NULL); if (has_non_ascii(buf)) need_8bit_cte = 1; unuse_commit_buffer(list[i], buf); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 3fcbf21c03..ff84a825ff 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data) else putchar('\n'); - if (revs->verbose_header && get_cached_commit_buffer(commit)) { + if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) { struct strbuf buf = STRBUF_INIT; struct pretty_print_context ctx = {0}; ctx.abbrev = revs->abbrev; diff --git a/commit.c b/commit.c index e289c78327..a036e181c7 100644 --- a/commit.c +++ b/commit.c @@ -245,22 +245,31 @@ int unregister_shallow(const unsigned char *sha1) return 0; } -define_commit_slab(buffer_slab, void *); +struct commit_buffer { + void *buffer; + unsigned long size; +}; +define_commit_slab(buffer_slab, struct commit_buffer); static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab); -void set_commit_buffer(struct commit *commit, void *buffer) +void set_commit_buffer(struct commit *commit, void *buffer, unsigned long size) { - *buffer_slab_at(&buffer_slab, commit) = buffer; + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + v->buffer = buffer; + v->size = size; } -const void *get_cached_commit_buffer(const struct commit *commit) +const void *get_cached_commit_buffer(const struct commit *commit, unsigned long *sizep) { - return *buffer_slab_at(&buffer_slab, commit); + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + if (sizep) + *sizep = v->size; + return v->buffer; } -const void *get_commit_buffer(const struct commit *commit) +const void *get_commit_buffer(const struct commit *commit, unsigned long *sizep) { - const void *ret = get_cached_commit_buffer(commit); + const void *ret = get_cached_commit_buffer(commit, sizep); if (!ret) { enum object_type type; unsigned long size; @@ -271,29 +280,38 @@ const void *get_commit_buffer(const struct commit *commit) if (type != OBJ_COMMIT) die("expected commit for %s, got %s", sha1_to_hex(commit->object.sha1), typename(type)); + if (sizep) + *sizep = size; } return ret; } void unuse_commit_buffer(const struct commit *commit, const void *buffer) { - void *cached = *buffer_slab_at(&buffer_slab, commit); - if (cached != buffer) + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + if (v->buffer != buffer) free((void *)buffer); } void free_commit_buffer(struct commit *commit) { - void **b = buffer_slab_at(&buffer_slab, commit); - free(*b); - *b = NULL; + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + free(v->buffer); + v->buffer = NULL; + v->size = 0; } -const void *detach_commit_buffer(struct commit *commit) +const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) { - void **b = buffer_slab_at(&buffer_slab, commit); - void *ret = *b; - *b = NULL; + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + void *ret; + + ret = v->buffer; + if (sizep) + *sizep = v->size; + + v->buffer = NULL; + v->size = 0; return ret; } @@ -374,7 +392,7 @@ int parse_commit(struct commit *item) } ret = parse_commit_buffer(item, buffer, size); if (save_commit_buffer && !ret) { - set_commit_buffer(item, buffer); + set_commit_buffer(item, buffer, size); return 0; } free(buffer); @@ -589,7 +607,7 @@ static void record_author_date(struct author_date_slab *author_date, struct commit *commit) { const char *buf, *line_end, *ident_line; - const char *buffer = get_commit_buffer(commit); + const char *buffer = get_commit_buffer(commit, NULL); struct ident_split ident; char *date_end; unsigned long date; diff --git a/commit.h b/commit.h index e1c25692f1..61559a9d45 100644 --- a/commit.h +++ b/commit.h @@ -54,20 +54,20 @@ void parse_commit_or_die(struct commit *item); * Associate an object buffer with the commit. The ownership of the * memory is handed over to the commit, and must be free()-able. */ -void set_commit_buffer(struct commit *, void *buffer); +void set_commit_buffer(struct commit *, void *buffer, unsigned long size); /* * Get any cached object buffer associated with the commit. Returns NULL * if none. The resulting memory should not be freed. */ -const void *get_cached_commit_buffer(const struct commit *); +const void *get_cached_commit_buffer(const struct commit *, unsigned long *size); /* * Get the commit's object contents, either from cache or by reading the object * from disk. The resulting memory should not be modified, and must be given * to unuse_commit_buffer when the caller is done. */ -const void *get_commit_buffer(const struct commit *); +const void *get_commit_buffer(const struct commit *, unsigned long *size); /* * Tell the commit subsytem that we are done with a particular commit buffer. @@ -86,7 +86,7 @@ void free_commit_buffer(struct commit *); * Disassociate any cached object buffer from the commit, but do not free it. * The buffer (or NULL, if none) is returned. */ -const void *detach_commit_buffer(struct commit *); +const void *detach_commit_buffer(struct commit *, unsigned long *sizep); /* Find beginning and length of commit subject. */ int find_commit_subject(const char *commit_buffer, const char **subject); diff --git a/fsck.c b/fsck.c index 8223780592..a7233c8d0b 100644 --- a/fsck.c +++ b/fsck.c @@ -339,7 +339,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, static int fsck_commit(struct commit *commit, fsck_error error_func) { - const char *buffer = get_commit_buffer(commit); + const char *buffer = get_commit_buffer(commit, NULL); int ret = fsck_commit_buffer(commit, buffer, error_func); unuse_commit_buffer(commit, buffer); return ret; diff --git a/log-tree.c b/log-tree.c index e9ef8abd37..444702163a 100644 --- a/log-tree.c +++ b/log-tree.c @@ -588,7 +588,7 @@ void show_log(struct rev_info *opt) show_mergetag(opt, commit); } - if (!get_cached_commit_buffer(commit)) + if (!get_cached_commit_buffer(commit, NULL)) return; if (opt->show_notes) { diff --git a/merge-recursive.c b/merge-recursive.c index 78908aaacc..a9ab328923 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -190,7 +190,7 @@ static void output_commit_title(struct merge_options *o, struct commit *commit) printf(_("(bad commit)\n")); else { const char *title; - const char *msg = get_commit_buffer(commit); + const char *msg = get_commit_buffer(commit, NULL); int len = find_commit_subject(msg, &title); if (len) printf("%.*s\n", len, title); diff --git a/notes-merge.c b/notes-merge.c index e804db2d02..fd5fae255d 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -672,7 +672,7 @@ int notes_merge_commit(struct notes_merge_options *o, DIR *dir; struct dirent *e; struct strbuf path = STRBUF_INIT; - const char *buffer = get_commit_buffer(partial_commit); + const char *buffer = get_commit_buffer(partial_commit, NULL); const char *msg = strstr(buffer, "\n\n"); int baselen; diff --git a/object.c b/object.c index 67b6e3533d..9c31e9a5e0 100644 --- a/object.c +++ b/object.c @@ -197,8 +197,8 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t if (commit) { if (parse_commit_buffer(commit, buffer, size)) return NULL; - if (!get_cached_commit_buffer(commit)) { - set_commit_buffer(commit, buffer); + if (!get_cached_commit_buffer(commit, NULL)) { + set_commit_buffer(commit, buffer, size); *eaten_p = 1; } obj = &commit->object; diff --git a/pretty.c b/pretty.c index 915bd1e2e9..b9fceedbb9 100644 --- a/pretty.c +++ b/pretty.c @@ -613,7 +613,7 @@ const char *logmsg_reencode(const struct commit *commit, static const char *utf8 = "UTF-8"; const char *use_encoding; char *encoding; - const char *msg = get_commit_buffer(commit); + const char *msg = get_commit_buffer(commit, NULL); char *out; if (!output_encoding || !*output_encoding) { @@ -642,7 +642,7 @@ const char *logmsg_reencode(const struct commit *commit, * the cached copy from get_commit_buffer, we need to duplicate it * to avoid munging the cached copy. */ - if (msg == get_cached_commit_buffer(commit)) + if (msg == get_cached_commit_buffer(commit, NULL)) out = xstrdup(msg); else out = (char *)msg; diff --git a/sequencer.c b/sequencer.c index 69bcf3d801..bbaddcb05a 100644 --- a/sequencer.c +++ b/sequencer.c @@ -662,7 +662,7 @@ static int format_todo(struct strbuf *buf, struct commit_list *todo_list, int subject_len; for (cur = todo_list; cur; cur = cur->next) { - const char *commit_buffer = get_commit_buffer(cur->item); + const char *commit_buffer = get_commit_buffer(cur->item, NULL); sha1_abbrev = find_unique_abbrev(cur->item->object.sha1, DEFAULT_ABBREV); subject_len = find_commit_subject(commit_buffer, &subject); strbuf_addf(buf, "%s %s %.*s\n", action_str, sha1_abbrev, diff --git a/sha1_name.c b/sha1_name.c index 0a65d234de..c2c938c4e1 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -869,7 +869,7 @@ static int get_sha1_oneline(const char *prefix, unsigned char *sha1, commit = pop_most_recent_commit(&list, ONELINE_SEEN); if (!parse_object(commit->object.sha1)) continue; - buf = get_commit_buffer(commit); + buf = get_commit_buffer(commit, NULL); p = strstr(buf, "\n\n"); matches = p && !regexec(®ex, p + 2, 0, NULL, 0); unuse_commit_buffer(commit, buf); -- cgit v1.2.3 From 218aa3a6162b80696a82b8745daa38fa826985ae Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 13 Jun 2014 02:32:11 -0400 Subject: reuse cached commit buffer when parsing signatures When we call show_signature or show_mergetag, we read the commit object fresh via read_sha1_file and reparse its headers. However, in most cases we already have the object data available, attached to the "struct commit". This is partially laziness in dealing with the memory allocation issues, but partially defensive programming, in that we would always want to verify a clean version of the buffer (not one that might have been munged by other users of the commit). However, we do not currently ever munge the commit buffer, and not using the already-available buffer carries a fairly big performance penalty when we are looking at a large number of commits. Here are timings on linux.git: [baseline, no signatures] $ time git log >/dev/null real 0m4.902s user 0m4.784s sys 0m0.120s [before] $ time git log --show-signature >/dev/null real 0m14.735s user 0m9.964s sys 0m0.944s [after] $ time git log --show-signature >/dev/null real 0m9.981s user 0m5.260s sys 0m0.936s Note that our user CPU time drops almost in half, close to the non-signature case, but we do still spend more wall-clock and system time, presumably from dealing with gpg. An alternative to this is to note that most commits do not have signatures (less than 1% in this repo), yet we pay the re-parsing cost for every commit just to find out if it has a mergetag or signature. If we checked that when parsing the commit initially, we could avoid re-examining most commits later on. Even if we did pursue that direction, however, this would still speed up the cases where we _do_ have signatures. So it's probably worth doing either way. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 27 ++++++++++----------------- commit.h | 2 +- log-tree.c | 2 +- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index a036e181c7..ebd7ad8465 100644 --- a/commit.c +++ b/commit.c @@ -1138,17 +1138,14 @@ static int do_sign_commit(struct strbuf *buf, const char *keyid) return 0; } -int parse_signed_commit(const unsigned char *sha1, +int parse_signed_commit(const struct commit *commit, struct strbuf *payload, struct strbuf *signature) { + unsigned long size; - enum object_type type; - char *buffer = read_sha1_file(sha1, &type, &size); + const char *buffer = get_commit_buffer(commit, &size); int in_signature, saw_signature = -1; - char *line, *tail; - - if (!buffer || type != OBJ_COMMIT) - goto cleanup; + const char *line, *tail; line = buffer; tail = buffer + size; @@ -1156,7 +1153,7 @@ int parse_signed_commit(const unsigned char *sha1, saw_signature = 0; while (line < tail) { const char *sig = NULL; - char *next = memchr(line, '\n', tail - line); + const char *next = memchr(line, '\n', tail - line); next = next ? next + 1 : tail; if (in_signature && line[0] == ' ') @@ -1177,8 +1174,7 @@ int parse_signed_commit(const unsigned char *sha1, } line = next; } - cleanup: - free(buffer); + unuse_commit_buffer(commit, buffer); return saw_signature; } @@ -1269,8 +1265,7 @@ void check_commit_signature(const struct commit* commit, struct signature_check sigc->result = 'N'; - if (parse_signed_commit(commit->object.sha1, - &payload, &signature) <= 0) + if (parse_signed_commit(commit, &payload, &signature) <= 0) goto out; status = verify_signed_buffer(payload.buf, payload.len, signature.buf, signature.len, @@ -1315,11 +1310,9 @@ struct commit_extra_header *read_commit_extra_headers(struct commit *commit, { struct commit_extra_header *extra = NULL; unsigned long size; - enum object_type type; - char *buffer = read_sha1_file(commit->object.sha1, &type, &size); - if (buffer && type == OBJ_COMMIT) - extra = read_commit_extra_header_lines(buffer, size, exclude); - free(buffer); + const char *buffer = get_commit_buffer(commit, &size); + extra = read_commit_extra_header_lines(buffer, size, exclude); + unuse_commit_buffer(commit, buffer); return extra; } diff --git a/commit.h b/commit.h index 61559a9d45..2e1492a6e4 100644 --- a/commit.h +++ b/commit.h @@ -325,7 +325,7 @@ struct merge_remote_desc { */ struct commit *get_merge_parent(const char *name); -extern int parse_signed_commit(const unsigned char *sha1, +extern int parse_signed_commit(const struct commit *commit, struct strbuf *message, struct strbuf *signature); extern void print_commit_list(struct commit_list *list, const char *format_cur, diff --git a/log-tree.c b/log-tree.c index 444702163a..10e68442b3 100644 --- a/log-tree.c +++ b/log-tree.c @@ -376,7 +376,7 @@ static void show_signature(struct rev_info *opt, struct commit *commit) struct strbuf gpg_output = STRBUF_INIT; int status; - if (parse_signed_commit(commit->object.sha1, &payload, &signature) <= 0) + if (parse_signed_commit(commit, &payload, &signature) <= 0) goto out; status = verify_signed_buffer(payload.buf, payload.len, -- cgit v1.2.3