diff options
author | Jacob Champion <jchampion@postgresql.org> | 2025-08-08 08:44:56 -0700 |
---|---|---|
committer | Jacob Champion <jchampion@postgresql.org> | 2025-08-08 08:44:56 -0700 |
commit | 3e311664e4974ea0eef708d54256aacbb0239c95 (patch) | |
tree | 0daf121fc121264131f6952233d8b674216eed2b | |
parent | 1749a12f0d2005ba23236089f0a32e6c6c1533f0 (diff) |
oauth: Track total call count during a client flow
Tracking down the bugs that led to the addition of comb_multiplexer()
and drain_timer_events() was difficult, because an inefficient flow is
not visibly different from one that is working properly. To help
maintainers notice when something has gone wrong, track the number of
calls into the flow as part of debug mode, and print the total when the
flow finishes.
A new test makes sure the total count is less than 100. (We expect
something on the order of 10.) This isn't foolproof, but it is able to
catch several regressions in the logic of the prior two commits, and
future work to add TLS support to the oauth_validator test server should
strengthen it as well.
Backpatch-through: 18
Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com
-rw-r--r-- | src/interfaces/libpq-oauth/oauth-curl.c | 22 | ||||
-rw-r--r-- | src/test/modules/oauth_validator/t/001_server.pl | 31 |
2 files changed, 52 insertions, 1 deletions
diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c index aa5d2bfd96c..aa50b00d053 100644 --- a/src/interfaces/libpq-oauth/oauth-curl.c +++ b/src/interfaces/libpq-oauth/oauth-curl.c @@ -278,6 +278,7 @@ struct async_ctx bool user_prompted; /* have we already sent the authz prompt? */ bool used_basic_auth; /* did we send a client secret? */ bool debugging; /* can we give unsafe developer assistance? */ + int dbg_num_calls; /* (debug mode) how many times were we called? */ }; /* @@ -3021,6 +3022,8 @@ PostgresPollingStatusType pg_fe_run_oauth_flow(PGconn *conn) { PostgresPollingStatusType result; + fe_oauth_state *state = conn_sasl_state(conn); + struct async_ctx *actx; #ifndef WIN32 sigset_t osigset; bool sigpipe_pending; @@ -3049,6 +3052,25 @@ pg_fe_run_oauth_flow(PGconn *conn) result = pg_fe_run_oauth_flow_impl(conn); + /* + * To assist with finding bugs in comb_multiplexer() and + * drain_timer_events(), when we're in debug mode, track the total number + * of calls to this function and print that at the end of the flow. + * + * Be careful that state->async_ctx could be NULL if early initialization + * fails during the first call. + */ + actx = state->async_ctx; + Assert(actx || result == PGRES_POLLING_FAILED); + + if (actx && actx->debugging) + { + actx->dbg_num_calls++; + if (result == PGRES_POLLING_OK || result == PGRES_POLLING_FAILED) + fprintf(stderr, "[libpq] total number of polls: %d\n", + actx->dbg_num_calls); + } + #ifndef WIN32 if (masked) { diff --git a/src/test/modules/oauth_validator/t/001_server.pl b/src/test/modules/oauth_validator/t/001_server.pl index 41672ebd5c6..c0dafb8be76 100644 --- a/src/test/modules/oauth_validator/t/001_server.pl +++ b/src/test/modules/oauth_validator/t/001_server.pl @@ -418,6 +418,35 @@ $node->connect_fails( qr/failed to obtain access token: mutual TLS required for client \(invalid_client\)/ ); +# Count the number of calls to the internal flow when multiple retries are +# triggered. The exact number depends on many things -- the TCP stack, the +# version of Curl in use, random chance -- but a ridiculously high number +# suggests something is wrong with our ability to clear multiplexer events after +# they're no longer applicable. +my ($ret, $stdout, $stderr) = $node->psql( + 'postgres', + "SELECT 'connected for call count'", + extra_params => ['-w'], + connstr => connstr(stage => 'token', retries => 2), + on_error_stop => 0); + +is($ret, 0, "call count connection succeeds"); +like( + $stderr, + qr@Visit https://example\.com/ and enter the code: postgresuser@, + "call count: stderr matches"); + +my $count_pattern = qr/\[libpq\] total number of polls: (\d+)/; +if (like($stderr, $count_pattern, "call count: count is printed")) +{ + # For reference, a typical flow with two retries might take between 5-15 + # calls to the client implementation. And while this will probably continue + # to change across OSes and Curl updates, we're likely in trouble if we see + # hundreds or thousands of calls. + $stderr =~ $count_pattern; + cmp_ok($1, '<', 100, "call count is reasonably small"); +} + # Stress test: make sure our builtin flow operates correctly even if the client # application isn't respecting PGRES_POLLING_READING/WRITING signals returned # from PQconnectPoll(). @@ -428,7 +457,7 @@ my @cmd = ( connstr(stage => 'all', retries => 1, interval => 1)); note "running '" . join("' '", @cmd) . "'"; -my ($stdout, $stderr) = run_command(\@cmd); +($stdout, $stderr) = run_command(\@cmd); like($stdout, qr/connection succeeded/, "stress-async: stdout matches"); unlike( |