summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAmit Kapila <akapila@postgresql.org>2025-09-24 04:11:53 +0000
committerAmit Kapila <akapila@postgresql.org>2025-09-24 04:11:53 +0000
commite41d954da6aa04f0a4453e566d3bcc064512d457 (patch)
tree3e364b55d07b6591cdda74cd028e2f2d4cefd49a /src
parentf83fe65f3fc1cae177f6c75f7b38eb951dd217b5 (diff)
Fix LOCK_TIMEOUT handling during parallel apply.
Previously, the parallel apply worker used SIGINT to receive a graceful shutdown signal from the leader apply worker. However, SIGINT is also used by the LOCK_TIMEOUT handler to trigger a query-cancel interrupt. This overlap caused the parallel apply worker to miss LOCK_TIMEOUT signals, leading to incorrect behavior during lock wait/contention. This patch resolves the conflict by switching the graceful shutdown signal from SIGINT to SIGUSR2. Reported-by: Zane Duffield <duffieldzane@gmail.com> Diagnosed-by: Zhijie Hou <houzj.fnst@fujitsu.com> Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/CACMiCkXyC4au74kvE2g6Y=mCEF8X6r-Ne_ty4r7qWkUjRE4+oQ@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/postmaster/interrupt.c5
-rw-r--r--src/backend/replication/logical/applyparallelworker.c17
-rw-r--r--src/backend/replication/logical/launcher.c4
3 files changed, 16 insertions, 10 deletions
diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c
index 0ae9bf906ec..ba63b84dfc5 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -94,9 +94,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS)
* shut down and exit.
*
* Typically, this handler would be used for SIGTERM, but some processes use
- * other signals. In particular, the checkpointer exits on SIGUSR2, and the WAL
- * writer and the logical replication parallel apply worker exits on either
- * SIGINT or SIGTERM.
+ * other signals. In particular, the checkpointer and parallel apply worker
+ * exit on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM.
*
* ShutdownRequestPending should be checked at a convenient place within the
* main loop, or else the main loop should call ProcessMainLoopInterrupts.
diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c
index 31a92d1a24a..33b7ec7f029 100644
--- a/src/backend/replication/logical/applyparallelworker.c
+++ b/src/backend/replication/logical/applyparallelworker.c
@@ -870,10 +870,17 @@ ParallelApplyWorkerMain(Datum main_arg)
InitializingApplyWorker = true;
- /* Setup signal handling. */
+ /*
+ * Setup signal handling.
+ *
+ * Note: We intentionally used SIGUSR2 to trigger a graceful shutdown
+ * initiated by the leader apply worker. This helps to differentiate it
+ * from the case where we abort the current transaction and exit on
+ * receiving SIGTERM.
+ */
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGINT, SignalHandlerForShutdownRequest);
pqsignal(SIGTERM, die);
+ pqsignal(SIGUSR2, SignalHandlerForShutdownRequest);
BackgroundWorkerUnblockSignals();
/*
@@ -972,9 +979,9 @@ ParallelApplyWorkerMain(Datum main_arg)
/*
* The parallel apply worker must not get here because the parallel apply
- * worker will only stop when it receives a SIGTERM or SIGINT from the
- * leader, or when there is an error. None of these cases will allow the
- * code to reach here.
+ * worker will only stop when it receives a SIGTERM or SIGUSR2 from the
+ * leader, or SIGINT from itself, or when there is an error. None of these
+ * cases will allow the code to reach here.
*/
Assert(false);
}
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index c900b6cf3b1..218cefe86e2 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -650,7 +650,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
/*
* Stop the given logical replication parallel apply worker.
*
- * Node that the function sends SIGINT instead of SIGTERM to the parallel apply
+ * Node that the function sends SIGUSR2 instead of SIGTERM to the parallel apply
* worker so that the worker exits cleanly.
*/
void
@@ -688,7 +688,7 @@ logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo)
* Only stop the worker if the generation matches and the worker is alive.
*/
if (worker->generation == generation && worker->proc)
- logicalrep_worker_stop_internal(worker, SIGINT);
+ logicalrep_worker_stop_internal(worker, SIGUSR2);
LWLockRelease(LogicalRepWorkerLock);
}