summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2024-04-07 20:33:45 +0200
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2024-04-07 20:33:45 +0200
commita0e0fb1ba56ff4323542c75ce997a41d31ad28a0 (patch)
treec6510264b6ca42f68a53bab76da0d0ab0adab52d
parent473411fc51157e8e825ee865c2822f976e0da5e3 (diff)
Use conditional variable to wait for next MultiXact offset
In one multixact.c edge case, we need a mechanism to wait for one multixact offset to be written before being allowed to read the next one. We used to handle this case by sleeping for one millisecond and retrying, but such sleeps have been reported as problematic in production cases. We can avoid the problem by using a condition variable: readers sleep on it and then every creator of multixacts broadcasts into the CV when creation is sufficiently far along. Author: Kyotaro Horiguchi <horikyotajntt@gmail.com> Reviewed-by: Andrey Borodin <amborodin@acm.org> Discussion: https://postgr.es/m/47A598F4-B4E7-4029-8FEC-A06A6C3CB4B5@yandex-team.ru Discussion: https://postgr.es/m/20200515.090333.24867479329066911.horikyota.ntt
-rw-r--r--src/backend/access/transam/multixact.c30
-rw-r--r--src/backend/utils/activity/wait_event_names.txt1
2 files changed, 29 insertions, 2 deletions
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 83b578dced7..380c866d714 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -82,6 +82,7 @@
#include "lib/ilist.h"
#include "miscadmin.h"
#include "pg_trace.h"
+#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
@@ -233,6 +234,12 @@ typedef struct MultiXactStateData
MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
/*
+ * This is used to sleep until a multixact offset is written when we want
+ * to create the next one.
+ */
+ ConditionVariable nextoff_cv;
+
+ /*
* Per-backend data starts here. We have two arrays stored in the area
* immediately following the MultiXactStateData struct. Each is indexed by
* ProcNumber.
@@ -895,6 +902,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
/* Release MultiXactOffset SLRU lock. */
LWLockRelease(lock);
+ /*
+ * If anybody was waiting to know the offset of this multixact ID we just
+ * wrote, they can read it now, so wake them up.
+ */
+ ConditionVariableBroadcast(&MultiXactState->nextoff_cv);
+
prev_pageno = -1;
for (i = 0; i < nmembers; i++, offset++)
@@ -1253,6 +1266,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
MultiXactOffset nextOffset;
MultiXactMember *ptr;
LWLock *lock;
+ bool slept = false;
debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
@@ -1340,7 +1354,9 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
* (because we are careful to pre-zero offset pages). Because
* GetNewMultiXactId will never return zero as the starting offset for a
* multixact, when we read zero as the next multixact's offset, we know we
- * have this case. We sleep for a bit and try again.
+ * have this case. We handle this by sleeping on the condition variable
+ * we have just for this; the process in charge will signal the CV as soon
+ * as it has finished writing the multixact offset.
*
* 3. Because GetNewMultiXactId increments offset zero to offset one to
* handle case #2, there is an ambiguity near the point of offset
@@ -1422,7 +1438,10 @@ retry:
/* Corner case 2: next multixact is still being filled in */
LWLockRelease(lock);
CHECK_FOR_INTERRUPTS();
- pg_usleep(1000L);
+
+ ConditionVariableSleep(&MultiXactState->nextoff_cv,
+ WAIT_EVENT_MULTIXACT_CREATION);
+ slept = true;
goto retry;
}
@@ -1432,6 +1451,12 @@ retry:
LWLockRelease(lock);
lock = NULL;
+ /*
+ * If we slept above, clean up state; it's no longer needed.
+ */
+ if (slept)
+ ConditionVariableCancelSleep();
+
ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
truelength = 0;
@@ -1921,6 +1946,7 @@ MultiXactShmemInit(void)
/* Make sure we zero out the per-backend state */
MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
+ ConditionVariableInit(&MultiXactState->nextoff_cv);
}
else
Assert(found);
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 5f2fa814c8e..f079d660a46 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -139,6 +139,7 @@ MESSAGE_QUEUE_INTERNAL "Waiting for another process to be attached to a shared m
MESSAGE_QUEUE_PUT_MESSAGE "Waiting to write a protocol message to a shared message queue."
MESSAGE_QUEUE_RECEIVE "Waiting to receive bytes from a shared message queue."
MESSAGE_QUEUE_SEND "Waiting to send bytes to a shared message queue."
+MULTIXACT_CREATION "Waiting for a multixact creation to complete."
PARALLEL_BITMAP_SCAN "Waiting for parallel bitmap scan to become initialized."
PARALLEL_CREATE_INDEX_SCAN "Waiting for parallel <command>CREATE INDEX</command> workers to finish heap scan."
PARALLEL_FINISH "Waiting for parallel workers to finish computing."