From 9ce346eabf350a130bba46be3f8c50ba28506969 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 25 Oct 2021 11:51:57 -0400 Subject: Report progress of startup operations that take a long time. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users sometimes get concerned whe they start the server and it emits a few messages and then doesn't emit any more messages for a long time. Generally, what's happening is either that the system is taking a long time to apply WAL, or it's taking a long time to reset unlogged relations, or it's taking a long time to fsync the data directory, but it's not easy to tell which is the case. To fix that, add a new 'log_startup_progress_interval' setting, by default 10s. When an operation that is known to be potentially long-running takes more than this amount of time, we'll log a status update each time this interval elapses. To avoid undesirable log chatter, don't log anything about WAL replay when in standby mode. Nitin Jadhav and Robert Haas, reviewed by Amul Sul, Bharath Rupireddy, Justin Pryzby, Michael Paquier, and Álvaro Herrera. Discussion: https://postgr.es/m/CA+TgmoaHQrgDFOBwgY16XCoMtXxsrVGFB2jNCvb7-ubuEe1MGg@mail.gmail.com Discussion: https://postgr.es/m/CAMm1aWaHF7VE69572_OLQ+MgpT5RUiUDgF1x5RrtkJBLdpRj3Q@mail.gmail.com --- src/backend/access/transam/xlog.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/backend/access/transam/xlog.c') diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 142506834c4..cd553d6e12f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -79,6 +79,7 @@ #include "utils/relmapper.h" #include "utils/pg_rusage.h" #include "utils/snapmgr.h" +#include "utils/timeout.h" #include "utils/timestamp.h" extern uint32 bootstrap_data_checksum_version; @@ -6718,6 +6719,11 @@ StartupXLOG(void) */ ValidateXLOGDirectoryStructure(); + /* Set up timeout handler needed to report startup progress. */ + if (!IsBootstrapProcessingMode()) + RegisterTimeout(STARTUP_PROGRESS_TIMEOUT, + startup_progress_timeout_handler); + /*---------- * If we previously crashed, perform a couple of actions: * @@ -7491,6 +7497,10 @@ StartupXLOG(void) (errmsg("redo starts at %X/%X", LSN_FORMAT_ARGS(ReadRecPtr)))); + /* Prepare to report progress of the redo phase. */ + if (!StandbyMode) + begin_startup_progress_phase(); + /* * main redo apply loop */ @@ -7498,6 +7508,10 @@ StartupXLOG(void) { bool switchedTLI = false; + if (!StandbyMode) + ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X", + LSN_FORMAT_ARGS(ReadRecPtr)); + #ifdef WAL_DEBUG if (XLOG_DEBUG || (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || -- cgit v1.2.3