test_json_parser: Speed up 002_inline.plHEAD origin/master origin/HEAD master

Some macOS machines are having trouble with 002_inline, which executes the JSON parser test executables hundreds of times in a nested loop. Both developer machines and buildfarm critters have shown excessive test durations, upwards of 20 seconds. Push the innermost loop of 002_inline, which iterates through differing chunk sizes, down into the test executable. (I'd eventually like to push all of the JSON unit tests down into C, but this is an easy win in the short term.) Testers have reported a speedup between 4-9x. Reported-by: Robert Haas <robertmhaas@gmail.com> Suggested-by: Andres Freund <andres@anarazel.de> Tested-by: Andrew Dunstan <andrew@dunslane.net> Tested-by: Tom Lane <tgl@sss.pgh.pa.us> Tested-by: Robert Haas <robertmhaas@gmail.com> Discussion: https://postgr.es/m/CA%2BTgmobKoG%2BgKzH9qB7uE4MFo-z1hn7UngqAe9b0UqNbn3_XGQ%40mail.gmail.com Backpatch-through: 17
author: Jacob Champion <jchampion@postgresql.org> 2025-10-01 09:48:57 -0700
committer: Jacob Champion <jchampion@postgresql.org> 2025-10-01 09:48:57 -0700
commit: fd726b8379a8191da9e74d28761c967eda13f5f2 (patch)
tree: 86ea0381cd4c29f0a7a962f57a01a0d693bb5823 /src/test/modules/test_json_parser
parent: 3e908fb54ff8fa857180bc212ca79c7bd95dcc2c (diff)
3 files changed, 106 insertions, 57 deletions
diff --git a/src/test/modules/test_json_parser/README b/src/test/modules/test_json_parser/README
index ceccd499f43..61e7c78d588 100644
--- a/src/test/modules/test_json_parser/README
+++ b/src/test/modules/test_json_parser/README
@@ -6,10 +6,12 @@ This module contains two programs for testing the json parsers.
 - `test_json_parser_incremental` is for testing the incremental parser, It
   reads in a file and passes it in very small chunks (default is 60 bytes at a
   time) to the incremental parser. It's not meant to be a speed test but to
-  test the accuracy of the incremental parser.  There are two option arguments,
-  "-c nn" specifies an alternative chunk size, and "-s" specifies using
-  semantic routines. The semantic routines re-output the json, although not in
-  a very pretty form. The required non-option argument is the input file name.
+  test the accuracy of the incremental parser. The option "-c nn" specifies an
+  alternative chunk size, "-r nn" runs a range of chunk sizes down to one byte
+  on the same input (with output separated by null bytes), and "-s" specifies
+  using semantic routines. The semantic routines re-output the json, although
+  not in a very pretty form. The required non-option argument is the input file
+  name.
 - `test_json_parser_perf` is for speed testing both the standard
   recursive descent parser and the non-recursive incremental
   parser. If given the `-i` flag it uses the non-recursive parser,
diff --git a/src/test/modules/test_json_parser/t/002_inline.pl b/src/test/modules/test_json_parser/t/002_inline.pl
index 7c8b64977ec..3dd16731e90 100644
--- a/src/test/modules/test_json_parser/t/002_inline.pl
+++ b/src/test/modules/test_json_parser/t/002_inline.pl
@@ -33,23 +33,37 @@ sub test
 	print $fh "$json";
 	close($fh);
 
+	# The -r mode runs the parser in a loop, with output separated by nulls.
+	# Unpack that as a list of null-terminated ASCII strings (Z*) and check that
+	# each run produces the same result.
+	my ($all_stdout, $all_stderr) =
+	  run_command([ @exe, "-r", $chunk, $fname ]);
+
+	my @stdout = unpack("(Z*)*", $all_stdout);
+	my @stderr = unpack("(Z*)*", $all_stderr);
+
+	is(scalar @stdout, $chunk, "$name: stdout has correct number of entries");
+	is(scalar @stderr, $chunk, "$name: stderr has correct number of entries");
+
+	my $i = 0;
+
 	foreach my $size (reverse(1 .. $chunk))
 	{
-		my ($stdout, $stderr) = run_command([ @exe, "-c", $size, $fname ]);
-
 		if (defined($params{error}))
 		{
-			unlike($stdout, qr/SUCCESS/,
+			unlike($stdout[$i], qr/SUCCESS/,
 				"$name, chunk size $size: test fails");
-			like($stderr, $params{error},
+			like($stderr[$i], $params{error},
 				"$name, chunk size $size: correct error output");
 		}
 		else
 		{
-			like($stdout, qr/SUCCESS/,
+			like($stdout[$i], qr/SUCCESS/,
 				"$name, chunk size $size: test succeeds");
-			is($stderr, "", "$name, chunk size $size: no error output");
+			is($stderr[$i], "", "$name, chunk size $size: no error output");
 		}
+
+		$i++;
 	}
 }
 
diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c
index d1e3e4ab4ea..8c78061ee46 100644
--- a/src/test/modules/test_json_parser/test_json_parser_incremental.c
+++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c
@@ -12,9 +12,14 @@
  * the parser in very small chunks. In practice you would normally use
  * much larger chunks, but doing this makes it more likely that the
  * full range of increment handling, especially in the lexer, is exercised.
+ *
  * If the "-c SIZE" option is provided, that chunk size is used instead
  * of the default of 60.
  *
+ * If the "-r SIZE" option is provided, a range of chunk sizes from SIZE down to
+ * 1 are run sequentially. A null byte is printed to the streams after each
+ * iteration.
+ *
  * If the -s flag is given, the program does semantic processing. This should
  * just mirror back the json, albeit with white space changes.
  *
@@ -88,8 +93,8 @@ main(int argc, char **argv)
 	StringInfoData json;
 	int			n_read;
 	size_t		chunk_size = DEFAULT_CHUNK_SIZE;
+	bool		run_chunk_ranges = false;
 	struct stat statbuf;
-	off_t		bytes_left;
 	const JsonSemAction *testsem = &nullSemAction;
 	char	   *testfile;
 	int			c;
@@ -102,11 +107,14 @@ main(int argc, char **argv)
 	if (!lex)
 		pg_fatal("out of memory");
 
-	while ((c = getopt(argc, argv, "c:os")) != -1)
+	while ((c = getopt(argc, argv, "r:c:os")) != -1)
 	{
 		switch (c)
 		{
-			case 'c':			/* chunksize */
+			case 'r':			/* chunk range */
+				run_chunk_ranges = true;
+				/* fall through */
+			case 'c':			/* chunk size */
 				chunk_size = strtou64(optarg, NULL, 10);
 				if (chunk_size > BUFSIZE)
 					pg_fatal("chunk size cannot exceed %d", BUFSIZE);
@@ -135,8 +143,6 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
-	makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
-	setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
 	initStringInfo(&json);
 
 	if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
@@ -145,61 +151,88 @@ main(int argc, char **argv)
 	if (fstat(fileno(json_file), &statbuf) != 0)
 		pg_fatal("error statting input: %m");
 
-	bytes_left = statbuf.st_size;
-
-	for (;;)
+	do
 	{
-		/* We will break when there's nothing left to read */
-
-		if (bytes_left < chunk_size)
-			chunk_size = bytes_left;
+		/*
+		 * This outer loop only repeats in -r mode. Reset the parse state and
+		 * our position in the input file for the inner loop, which performs
+		 * the incremental parsing.
+		 */
+		off_t		bytes_left = statbuf.st_size;
+		size_t		to_read = chunk_size;
 
-		n_read = fread(buff, 1, chunk_size, json_file);
-		if (n_read < chunk_size)
-			pg_fatal("error reading input file: %d", ferror(json_file));
+		makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
+		setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
 
-		appendBinaryStringInfo(&json, buff, n_read);
+		rewind(json_file);
+		resetStringInfo(&json);
 
-		/*
-		 * Append some trailing junk to the buffer passed to the parser. This
-		 * helps us ensure that the parser does the right thing even if the
-		 * chunk isn't terminated with a '\0'.
-		 */
-		appendStringInfoString(&json, "1+23 trailing junk");
-		bytes_left -= n_read;
-		if (bytes_left > 0)
+		for (;;)
 		{
-			result = pg_parse_json_incremental(lex, testsem,
-											   json.data, n_read,
-											   false);
-			if (result != JSON_INCOMPLETE)
+			/* We will break when there's nothing left to read */
+
+			if (bytes_left < to_read)
+				to_read = bytes_left;
+
+			n_read = fread(buff, 1, to_read, json_file);
+			if (n_read < to_read)
+				pg_fatal("error reading input file: %d", ferror(json_file));
+
+			appendBinaryStringInfo(&json, buff, n_read);
+
+			/*
+			 * Append some trailing junk to the buffer passed to the parser.
+			 * This helps us ensure that the parser does the right thing even
+			 * if the chunk isn't terminated with a '\0'.
+			 */
+			appendStringInfoString(&json, "1+23 trailing junk");
+			bytes_left -= n_read;
+			if (bytes_left > 0)
 			{
-				fprintf(stderr, "%s\n", json_errdetail(result, lex));
-				ret = 1;
-				goto cleanup;
+				result = pg_parse_json_incremental(lex, testsem,
+												   json.data, n_read,
+												   false);
+				if (result != JSON_INCOMPLETE)
+				{
+					fprintf(stderr, "%s\n", json_errdetail(result, lex));
+					ret = 1;
+					goto cleanup;
+				}
+				resetStringInfo(&json);
 			}
-			resetStringInfo(&json);
-		}
-		else
-		{
-			result = pg_parse_json_incremental(lex, testsem,
-											   json.data, n_read,
-											   true);
-			if (result != JSON_SUCCESS)
+			else
 			{
-				fprintf(stderr, "%s\n", json_errdetail(result, lex));
-				ret = 1;
-				goto cleanup;
+				result = pg_parse_json_incremental(lex, testsem,
+												   json.data, n_read,
+												   true);
+				if (result != JSON_SUCCESS)
+				{
+					fprintf(stderr, "%s\n", json_errdetail(result, lex));
+					ret = 1;
+					goto cleanup;
+				}
+				if (!need_strings)
+					printf("SUCCESS!\n");
+				break;
 			}
-			if (!need_strings)
-				printf("SUCCESS!\n");
-			break;
 		}
-	}
 
 cleanup:
+		freeJsonLexContext(lex);
+
+		/*
+		 * In -r mode, separate output with nulls so that the calling test can
+		 * split it up, decrement the chunk size, and loop back to the top.
+		 * All other modes immediately fall out of the loop and exit.
+		 */
+		if (run_chunk_ranges)
+		{
+			fputc('\0', stdout);
+			fputc('\0', stderr);
+		}
+	} while (run_chunk_ranges && (--chunk_size > 0));
+
 	fclose(json_file);
-	freeJsonLexContext(lex);
 	free(json.data);
 	free(lex);
author	Jacob Champion <jchampion@postgresql.org>	2025-10-01 09:48:57 -0700
committer	Jacob Champion <jchampion@postgresql.org>	2025-10-01 09:48:57 -0700
commit	fd726b8379a8191da9e74d28761c967eda13f5f2 (patch)
tree	86ea0381cd4c29f0a7a962f57a01a0d693bb5823 /src/test/modules/test_json_parser
parent	3e908fb54ff8fa857180bc212ca79c7bd95dcc2c (diff)