From 36bf19589055fb71aac0ed6719dfe5b385adc2bf Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 24 Feb 2023 00:09:24 +0000 Subject: alloc.h: move ALLOC_GROW() functions from cache.h This allows us to replace includes of cache.h with includes of the much smaller alloc.h in many places. It does mean that we also need to add includes of alloc.h in a number of C files. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- string-list.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'string-list.c') diff --git a/string-list.c b/string-list.c index 42bacaec55..db473f273e 100644 --- a/string-list.c +++ b/string-list.c @@ -1,5 +1,6 @@ -#include "cache.h" +#include "git-compat-util.h" #include "string-list.h" +#include "alloc.h" void string_list_init_nodup(struct string_list *list) { -- cgit v1.2.3 From 52acddf36c8cb3778ab2098a0d95cc2e375a4069 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 24 Apr 2023 18:20:10 -0400 Subject: string-list: multi-delimiter `string_list_split_in_place()` Enhance `string_list_split_in_place()` to accept multiple characters as delimiters instead of a single character. Instead of using `strchr(2)` to locate the first occurrence of the given delimiter character, `string_list_split_in_place_multi()` uses `strcspn(2)` to move past the initial segment of characters comprised of any characters in the delimiting set. When only a single delimiting character is provided, `strpbrk(2)` (which is implemented with `strcspn(2)`) has equivalent performance to `strchr(2)`. Modern `strcspn(2)` implementations treat an empty delimiter or the singleton delimiter as a special case and fall back to calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)` this way. This change is one step to removing `strtok(2)` from the tree. Note that `string_list_split_in_place()` is not a strict replacement for `strtok()`, since it will happily turn sequential delimiter characters into empty entries in the resulting string_list. For example: string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1) would yield a string list of: ["foo", "", "", "bar", "", "", "baz"] Callers that wish to emulate the behavior of strtok(2) more directly should call `string_list_remove_empty_items()` after splitting. To avoid regressions for the new multi-character delimter cases, update t0063 in this patch as well. [1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35 [2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11 Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/gc.c | 4 ++-- diff.c | 2 +- notes.c | 2 +- refs/packed-backend.c | 2 +- string-list.c | 4 ++-- string-list.h | 2 +- t/helper/test-string-list.c | 4 ++-- t/t0063-string-list.sh | 51 +++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 61 insertions(+), 10 deletions(-) (limited to 'string-list.c') diff --git a/builtin/gc.c b/builtin/gc.c index edd98d35a5..f68e976704 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1687,11 +1687,11 @@ static int get_schedule_cmd(const char **cmd, int *is_available) if (is_available) *is_available = 0; - string_list_split_in_place(&list, testing, ',', -1); + string_list_split_in_place(&list, testing, ",", -1); for_each_string_list_item(item, &list) { struct string_list pair = STRING_LIST_INIT_NODUP; - if (string_list_split_in_place(&pair, item->string, ':', 2) != 2) + if (string_list_split_in_place(&pair, item->string, ":", 2) != 2) continue; if (!strcmp(*cmd, pair.items[0].string)) { diff --git a/diff.c b/diff.c index 78b0fdd8ca..378a0248e1 100644 --- a/diff.c +++ b/diff.c @@ -134,7 +134,7 @@ static int parse_dirstat_params(struct diff_options *options, const char *params int i; if (*params_copy) - string_list_split_in_place(¶ms, params_copy, ',', -1); + string_list_split_in_place(¶ms, params_copy, ",", -1); for (i = 0; i < params.nr; i++) { const char *p = params.items[i].string; if (!strcmp(p, "changes")) { diff --git a/notes.c b/notes.c index 45fb7f22d1..eee806f626 100644 --- a/notes.c +++ b/notes.c @@ -963,7 +963,7 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, char *globs_copy = xstrdup(globs); int i; - string_list_split_in_place(&split, globs_copy, ':', -1); + string_list_split_in_place(&split, globs_copy, ":", -1); string_list_remove_empty_items(&split, 0); for (i = 0; i < split.nr; i++) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1eba1015dd..cc903baa7e 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -650,7 +650,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs) snapshot->buf, snapshot->eof - snapshot->buf); - string_list_split_in_place(&traits, p, ' ', -1); + string_list_split_in_place(&traits, p, " ", -1); if (unsorted_string_list_has_string(&traits, "fully-peeled")) snapshot->peeled = PEELED_FULLY; diff --git a/string-list.c b/string-list.c index db473f273e..5f5b60fe1c 100644 --- a/string-list.c +++ b/string-list.c @@ -301,7 +301,7 @@ int string_list_split(struct string_list *list, const char *string, } int string_list_split_in_place(struct string_list *list, char *string, - int delim, int maxsplit) + const char *delim, int maxsplit) { int count = 0; char *p = string, *end; @@ -315,7 +315,7 @@ int string_list_split_in_place(struct string_list *list, char *string, string_list_append(list, p); return count; } - end = strchr(p, delim); + end = strpbrk(p, delim); if (end) { *end = '\0'; string_list_append(list, p); diff --git a/string-list.h b/string-list.h index c7b0d5d000..77854840f7 100644 --- a/string-list.h +++ b/string-list.h @@ -270,5 +270,5 @@ int string_list_split(struct string_list *list, const char *string, * list->strdup_strings must *not* be set. */ int string_list_split_in_place(struct string_list *list, char *string, - int delim, int maxsplit); + const char *delim, int maxsplit); #endif /* STRING_LIST_H */ diff --git a/t/helper/test-string-list.c b/t/helper/test-string-list.c index 2123dda85b..63df88575c 100644 --- a/t/helper/test-string-list.c +++ b/t/helper/test-string-list.c @@ -62,7 +62,7 @@ int cmd__string_list(int argc, const char **argv) struct string_list list = STRING_LIST_INIT_NODUP; int i; char *s = xstrdup(argv[2]); - int delim = *argv[3]; + const char *delim = argv[3]; int maxsplit = atoi(argv[4]); i = string_list_split_in_place(&list, s, delim, maxsplit); @@ -111,7 +111,7 @@ int cmd__string_list(int argc, const char **argv) */ if (sb.len && sb.buf[sb.len - 1] == '\n') strbuf_setlen(&sb, sb.len - 1); - string_list_split_in_place(&list, sb.buf, '\n', -1); + string_list_split_in_place(&list, sb.buf, "\n", -1); string_list_sort(&list); diff --git a/t/t0063-string-list.sh b/t/t0063-string-list.sh index 46d4839194..1fee6d9010 100755 --- a/t/t0063-string-list.sh +++ b/t/t0063-string-list.sh @@ -18,6 +18,14 @@ test_split () { " } +test_split_in_place() { + cat >expected && + test_expect_success "split (in place) $1 at $2, max $3" " + test-tool string-list split_in_place '$1' '$2' '$3' >actual && + test_cmp expected actual + " +} + test_split "foo:bar:baz" ":" "-1" < Date: Mon, 24 Apr 2023 18:20:14 -0400 Subject: string-list: introduce `string_list_setlen()` It is sometimes useful to reduce the size of a `string_list`'s list of items without having to re-allocate them. For example, doing the following: struct strbuf buf = STRBUF_INIT; struct string_list parts = STRING_LIST_INIT_NO_DUP; while (strbuf_getline(&buf, stdin) != EOF) { parts.nr = 0; string_list_split_in_place(&parts, buf.buf, ":", -1); /* ... */ } string_list_clear(&parts, 0); is preferable over calling `string_list_clear()` on every iteration of the loop. This is because `string_list_clear()` causes us free our existing `items` array. This means that every time we call `string_list_split_in_place()`, the string-list internals re-allocate the same size array. Since in the above example we do not care about the individual parts after processing each line, it is much more efficient to pretend that there aren't any elements in the `string_list` by setting `list->nr` to 0 while leaving the list of elements allocated as-is. This allows `string_list_split_in_place()` to overwrite any existing entries without needing to free and re-allocate them. However, setting `list->nr` manually is not safe in all instances. There are a couple of cases worth worrying about: - If the `string_list` is initialized with `strdup_strings`, truncating the list can lead to overwriting strings which are allocated elsewhere. If there aren't any other pointers to those strings other than the ones inside of the `items` array, they will become unreachable and leak. (We could ourselves free the truncated items between string_list->items[nr] and `list->nr`, but no present or future callers would benefit from this additional complexity). - If the given `nr` is larger than the current value of `list->nr`, we'll trick the `string_list` into a state where it thinks there are more items allocated than there actually are, which can lead to undefined behavior if we try to read or write those entries. Guard against both of these by introducing a helper function which guards assignment of `list->nr` against each of the above conditions. Co-authored-by: Jeff King Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- string-list.c | 9 +++++++++ string-list.h | 10 ++++++++++ 2 files changed, 19 insertions(+) (limited to 'string-list.c') diff --git a/string-list.c b/string-list.c index 5f5b60fe1c..0f8ac117fd 100644 --- a/string-list.c +++ b/string-list.c @@ -203,6 +203,15 @@ void string_list_clear_func(struct string_list *list, string_list_clear_func_t c list->nr = list->alloc = 0; } +void string_list_setlen(struct string_list *list, size_t nr) +{ + if (list->strdup_strings) + BUG("cannot setlen a string_list which owns its entries"); + if (nr > list->nr) + BUG("cannot grow a string_list with setlen"); + list->nr = nr; +} + struct string_list_item *string_list_append_nodup(struct string_list *list, char *string) { diff --git a/string-list.h b/string-list.h index 77854840f7..122b318641 100644 --- a/string-list.h +++ b/string-list.h @@ -134,6 +134,16 @@ typedef void (*string_list_clear_func_t)(void *p, const char *str); /** Call a custom clear function on each util pointer */ void string_list_clear_func(struct string_list *list, string_list_clear_func_t clearfunc); +/* + * Set the length of a string_list to `nr`, provided that (a) `list` + * does not own its own storage, and (b) that `nr` is no larger than + * `list->nr`. + * + * Useful when "shrinking" `list` to write over existing entries that + * are no longer used without reallocating. + */ +void string_list_setlen(struct string_list *list, size_t nr); + /** * Apply `func` to each item. If `func` returns nonzero, the * iteration aborts and the return value is propagated. -- cgit v1.2.3