diff options
| author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2025-12-03 19:15:08 +0200 |
|---|---|---|
| committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2025-12-03 19:15:08 +0200 |
| commit | 789d65364cdecd81e4bf822eec468ea3d34d28af (patch) | |
| tree | 7d6f11971b449ae5218c170a54f7de26382acbad /src/test | |
| parent | 9b05e2ec08a3d174accb2a9e1c59e52e94799acc (diff) | |
Set next multixid's offset when creating a new multixid
With this commit, the next multixid's offset will always be set on the
offsets page, by the time that a backend might try to read it, so we
no longer need the waiting mechanism with the condition variable. In
other words, this eliminates "corner case 2" mentioned in the
comments.
The waiting mechanism was broken in a few scenarios:
- When nextMulti was advanced without WAL-logging the next
multixid. For example, if a later multixid was already assigned and
WAL-logged before the previous one was WAL-logged, and then the
server crashed. In that case the next offset would never be set in
the offsets SLRU, and a query trying to read it would get stuck
waiting for it. Same thing could happen if pg_resetwal was used to
forcibly advance nextMulti.
- In hot standby mode, a deadlock could happen where one backend waits
for the next multixid assignment record, but WAL replay is not
advancing because of a recovery conflict with the waiting backend.
The old TAP test used carefully placed injection points to exercise
the old waiting code, but now that the waiting code is gone, much of
the old test is no longer relevant. Rewrite the test to reproduce the
IPC/MultixactCreation hang after crash recovery instead, and to verify
that previously recorded multixids stay readable.
Backpatch to all supported versions. In back-branches, we still need
to be able to read WAL that was generated before this fix, so in the
back-branches this includes a hack to initialize the next offsets page
when replaying XLOG_MULTIXACT_CREATE_ID for the last multixid on a
page. On 'master', bump XLOG_PAGE_MAGIC instead to indicate that the
WAL is not compatible.
Author: Andrey Borodin <amborodin@acm.org>
Reviewed-by: Dmitry Yurichev <dsy.075@yandex.ru>
Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Ivan Bykov <i.bykov@modernsys.ru>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://www.postgresql.org/message-id/172e5723-d65f-4eec-b512-14beacb326ce@yandex.ru
Backpatch-through: 14
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/modules/test_slru/t/001_multixact.pl | 116 | ||||
| -rw-r--r-- | src/test/modules/test_slru/test_multixact.c | 5 |
2 files changed, 32 insertions, 89 deletions
diff --git a/src/test/modules/test_slru/t/001_multixact.pl b/src/test/modules/test_slru/t/001_multixact.pl index e2b567a603d..7837eb810f0 100644 --- a/src/test/modules/test_slru/t/001_multixact.pl +++ b/src/test/modules/test_slru/t/001_multixact.pl @@ -1,10 +1,6 @@ # Copyright (c) 2024-2025, PostgreSQL Global Development Group -# This test verifies edge case of reading a multixact: -# when we have multixact that is followed by exactly one another multixact, -# and another multixact have no offset yet, we must wait until this offset -# becomes observable. Previously we used to wait for 1ms in a loop in this -# case, but now we use CV for this. This test is exercising such a sleep. +# Test multixid corner cases. use strict; use warnings FATAL => 'all'; @@ -19,9 +15,7 @@ if ($ENV{enable_injection_points} ne 'yes') plan skip_all => 'Injection points not supported by this build'; } -my ($node, $result); - -$node = PostgreSQL::Test::Cluster->new('mike'); +my $node = PostgreSQL::Test::Cluster->new('main'); $node->init; $node->append_conf('postgresql.conf', "shared_preload_libraries = 'test_slru,injection_points'"); @@ -29,95 +23,47 @@ $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION injection_points)); $node->safe_psql('postgres', q(CREATE EXTENSION test_slru)); -# Test for Multixact generation edge case -$node->safe_psql('postgres', - q{select injection_points_attach('test-multixact-read','wait')}); -$node->safe_psql('postgres', - q{select injection_points_attach('multixact-get-members-cv-sleep','wait')} -); +# This test creates three multixacts. The middle one is never +# WAL-logged or recorded on the offsets page, because we pause the +# backend and crash the server before that. After restart, verify that +# the other multixacts are readable, despite the middle one being +# lost. -# This session must observe sleep on the condition variable while generating a -# multixact. To achieve this it first will create a multixact, then pause -# before reading it. -my $observer = $node->background_psql('postgres'); - -# This query will create a multixact, and hang just before reading it. -$observer->query_until( - qr/start/, - q{ - \echo start - SELECT test_read_multixact(test_create_multixact()); -}); -$node->wait_for_event('client backend', 'test-multixact-read'); - -# This session will create the next Multixact. This is necessary to avoid -# multixact.c's non-sleeping edge case 1. -my $creator = $node->background_psql('postgres'); +# Create the first multixact +my $bg_psql = $node->background_psql('postgres'); +my $multi1 = $bg_psql->query_safe(q(SELECT test_create_multixact();)); + +# Assign the middle multixact. Use an injection point to prevent it +# from being fully recorded. $node->safe_psql('postgres', q{SELECT injection_points_attach('multixact-create-from-members','wait');} ); -# We expect this query to hang in the critical section after generating new -# multixact, but before filling its offset into SLRU. -# Running an injection point inside a critical section requires it to be -# loaded beforehand. -$creator->query_until( - qr/start/, q{ - \echo start +$bg_psql->query_until( + qr/assigning lost multi/, q( +\echo assigning lost multi SELECT test_create_multixact(); -}); +)); $node->wait_for_event('client backend', 'multixact-create-from-members'); - -# Ensure we have the backends waiting that we expect -is( $node->safe_psql( - 'postgres', - q{SELECT string_agg(wait_event, ', ' ORDER BY wait_event) - FROM pg_stat_activity WHERE wait_event_type = 'InjectionPoint'} - ), - 'multixact-create-from-members, test-multixact-read', - "matching injection point waits"); - -# Now wake observer to get it to read the initial multixact. A subsequent -# multixact already exists, but that one doesn't have an offset assigned, so -# this will hit multixact.c's edge case 2. -$node->safe_psql('postgres', - q{SELECT injection_points_wakeup('test-multixact-read')}); -$node->wait_for_event('client backend', 'multixact-get-members-cv-sleep'); - -# Ensure we have the backends waiting that we expect -is( $node->safe_psql( - 'postgres', - q{SELECT string_agg(wait_event, ', ' ORDER BY wait_event) - FROM pg_stat_activity WHERE wait_event_type = 'InjectionPoint'} - ), - 'multixact-create-from-members, multixact-get-members-cv-sleep', - "matching injection point waits"); - -# Now we have two backends waiting in multixact-create-from-members and -# multixact-get-members-cv-sleep. Also we have 3 injections points set to wait. -# If we wakeup multixact-get-members-cv-sleep it will happen again, so we must -# detach it first. So let's detach all injection points, then wake up all -# backends. - -$node->safe_psql('postgres', - q{SELECT injection_points_detach('test-multixact-read')}); $node->safe_psql('postgres', q{SELECT injection_points_detach('multixact-create-from-members')}); -$node->safe_psql('postgres', - q{SELECT injection_points_detach('multixact-get-members-cv-sleep')}); -$node->safe_psql('postgres', - q{SELECT injection_points_wakeup('multixact-create-from-members')}); -$node->safe_psql('postgres', - q{SELECT injection_points_wakeup('multixact-get-members-cv-sleep')}); +# Create the third multixid +my $multi2 = $node->safe_psql('postgres', q{SELECT test_create_multixact();}); + +# All set and done, it's time for hard restart +$node->stop('immediate'); +$node->start; +$bg_psql->{run}->finish; -# Background psql will now be able to read the result and disconnect. -$observer->quit; -$creator->quit; +# Verify that the recorded multixids are readable +is( $node->safe_psql('postgres', qq{SELECT test_read_multixact('$multi1');}), + '', + 'first recorded multi is readable'); -$node->stop; +is( $node->safe_psql('postgres', qq{SELECT test_read_multixact('$multi2');}), + '', + 'second recorded multi is readable'); -# If we reached this point - everything is OK. -ok(1); done_testing(); diff --git a/src/test/modules/test_slru/test_multixact.c b/src/test/modules/test_slru/test_multixact.c index 6c9b0420717..8fb6c19d70f 100644 --- a/src/test/modules/test_slru/test_multixact.c +++ b/src/test/modules/test_slru/test_multixact.c @@ -17,7 +17,6 @@ #include "access/multixact.h" #include "access/xact.h" #include "fmgr.h" -#include "utils/injection_point.h" PG_FUNCTION_INFO_V1(test_create_multixact); PG_FUNCTION_INFO_V1(test_read_multixact); @@ -37,8 +36,7 @@ test_create_multixact(PG_FUNCTION_ARGS) } /* - * Reads given multixact after running an injection point. Discards local cache - * to make a real read. Tailored for multixact testing. + * Reads given multixact. Discards local cache to make a real read. */ Datum test_read_multixact(PG_FUNCTION_ARGS) @@ -46,7 +44,6 @@ test_read_multixact(PG_FUNCTION_ARGS) MultiXactId id = PG_GETARG_TRANSACTIONID(0); MultiXactMember *members; - INJECTION_POINT("test-multixact-read", NULL); /* discard caches */ AtEOXact_MultiXact(); |
