summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Champion <jchampion@postgresql.org>2025-08-08 08:44:56 -0700
committerJacob Champion <jchampion@postgresql.org>2025-08-08 08:44:56 -0700
commit3e311664e4974ea0eef708d54256aacbb0239c95 (patch)
tree0daf121fc121264131f6952233d8b674216eed2b
parent1749a12f0d2005ba23236089f0a32e6c6c1533f0 (diff)
oauth: Track total call count during a client flow
Tracking down the bugs that led to the addition of comb_multiplexer() and drain_timer_events() was difficult, because an inefficient flow is not visibly different from one that is working properly. To help maintainers notice when something has gone wrong, track the number of calls into the flow as part of debug mode, and print the total when the flow finishes. A new test makes sure the total count is less than 100. (We expect something on the order of 10.) This isn't foolproof, but it is able to catch several regressions in the logic of the prior two commits, and future work to add TLS support to the oauth_validator test server should strengthen it as well. Backpatch-through: 18 Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com
-rw-r--r--src/interfaces/libpq-oauth/oauth-curl.c22
-rw-r--r--src/test/modules/oauth_validator/t/001_server.pl31
2 files changed, 52 insertions, 1 deletions
diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c
index aa5d2bfd96c..aa50b00d053 100644
--- a/src/interfaces/libpq-oauth/oauth-curl.c
+++ b/src/interfaces/libpq-oauth/oauth-curl.c
@@ -278,6 +278,7 @@ struct async_ctx
bool user_prompted; /* have we already sent the authz prompt? */
bool used_basic_auth; /* did we send a client secret? */
bool debugging; /* can we give unsafe developer assistance? */
+ int dbg_num_calls; /* (debug mode) how many times were we called? */
};
/*
@@ -3021,6 +3022,8 @@ PostgresPollingStatusType
pg_fe_run_oauth_flow(PGconn *conn)
{
PostgresPollingStatusType result;
+ fe_oauth_state *state = conn_sasl_state(conn);
+ struct async_ctx *actx;
#ifndef WIN32
sigset_t osigset;
bool sigpipe_pending;
@@ -3049,6 +3052,25 @@ pg_fe_run_oauth_flow(PGconn *conn)
result = pg_fe_run_oauth_flow_impl(conn);
+ /*
+ * To assist with finding bugs in comb_multiplexer() and
+ * drain_timer_events(), when we're in debug mode, track the total number
+ * of calls to this function and print that at the end of the flow.
+ *
+ * Be careful that state->async_ctx could be NULL if early initialization
+ * fails during the first call.
+ */
+ actx = state->async_ctx;
+ Assert(actx || result == PGRES_POLLING_FAILED);
+
+ if (actx && actx->debugging)
+ {
+ actx->dbg_num_calls++;
+ if (result == PGRES_POLLING_OK || result == PGRES_POLLING_FAILED)
+ fprintf(stderr, "[libpq] total number of polls: %d\n",
+ actx->dbg_num_calls);
+ }
+
#ifndef WIN32
if (masked)
{
diff --git a/src/test/modules/oauth_validator/t/001_server.pl b/src/test/modules/oauth_validator/t/001_server.pl
index 41672ebd5c6..c0dafb8be76 100644
--- a/src/test/modules/oauth_validator/t/001_server.pl
+++ b/src/test/modules/oauth_validator/t/001_server.pl
@@ -418,6 +418,35 @@ $node->connect_fails(
qr/failed to obtain access token: mutual TLS required for client \(invalid_client\)/
);
+# Count the number of calls to the internal flow when multiple retries are
+# triggered. The exact number depends on many things -- the TCP stack, the
+# version of Curl in use, random chance -- but a ridiculously high number
+# suggests something is wrong with our ability to clear multiplexer events after
+# they're no longer applicable.
+my ($ret, $stdout, $stderr) = $node->psql(
+ 'postgres',
+ "SELECT 'connected for call count'",
+ extra_params => ['-w'],
+ connstr => connstr(stage => 'token', retries => 2),
+ on_error_stop => 0);
+
+is($ret, 0, "call count connection succeeds");
+like(
+ $stderr,
+ qr@Visit https://example\.com/ and enter the code: postgresuser@,
+ "call count: stderr matches");
+
+my $count_pattern = qr/\[libpq\] total number of polls: (\d+)/;
+if (like($stderr, $count_pattern, "call count: count is printed"))
+{
+ # For reference, a typical flow with two retries might take between 5-15
+ # calls to the client implementation. And while this will probably continue
+ # to change across OSes and Curl updates, we're likely in trouble if we see
+ # hundreds or thousands of calls.
+ $stderr =~ $count_pattern;
+ cmp_ok($1, '<', 100, "call count is reasonably small");
+}
+
# Stress test: make sure our builtin flow operates correctly even if the client
# application isn't respecting PGRES_POLLING_READING/WRITING signals returned
# from PQconnectPoll().
@@ -428,7 +457,7 @@ my @cmd = (
connstr(stage => 'all', retries => 1, interval => 1));
note "running '" . join("' '", @cmd) . "'";
-my ($stdout, $stderr) = run_command(\@cmd);
+($stdout, $stderr) = run_command(\@cmd);
like($stdout, qr/connection succeeded/, "stress-async: stdout matches");
unlike(