From 12915a58eec962f407a6c38ce2bf08a48dde57b5 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 25 Dec 2023 00:52:42 +0200 Subject: Enhance checkpointer restartpoint statistics Bhis commit introduces enhancements to the pg_stat_checkpointer view by adding three new columns: restartpoints_timed, restartpoints_req, and restartpoints_done. These additions aim to improve the visibility and monitoring of restartpoint processes on replicas. Previously, it was challenging to differentiate between successful and failed restartpoint requests. This limitation arises because restartpoints on replicas are dependent on checkpoint records from the primary, and cannot occur more frequently than these checkpoints. The new columns allow for clear distinction and tracking of restartpoint requests, their triggers, and successful completions. This enhancement aids database administrators and developers in better understanding and diagnosing issues related to restartpoint behavior, particularly in scenarios where restartpoint requests may fail. System catalog is changed. Catversion is bumped. Discussion: https://postgr.es/m/99b2ccd1-a77a-962a-0837-191cdf56c2b9%40inbox.ru Author: Anton A. Melnikov Reviewed-by: Kyotaro Horiguchi, Alexander Korotkov --- src/backend/postmaster/checkpointer.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'src/backend/postmaster/checkpointer.c') diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index dc2da5a2cd8..67ecb177e7e 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -340,6 +340,8 @@ CheckpointerMain(void) pg_time_t now; int elapsed_secs; int cur_timeout; + bool chkpt_or_rstpt_requested = false; + bool chkpt_or_rstpt_timed = false; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); @@ -358,7 +360,7 @@ CheckpointerMain(void) if (((volatile CheckpointerShmemStruct *) CheckpointerShmem)->ckpt_flags) { do_checkpoint = true; - PendingCheckpointerStats.num_requested++; + chkpt_or_rstpt_requested = true; } /* @@ -372,7 +374,7 @@ CheckpointerMain(void) if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) - PendingCheckpointerStats.num_timed++; + chkpt_or_rstpt_timed = true; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } @@ -408,6 +410,24 @@ CheckpointerMain(void) if (flags & CHECKPOINT_END_OF_RECOVERY) do_restartpoint = false; + if (chkpt_or_rstpt_timed) + { + chkpt_or_rstpt_timed = false; + if (do_restartpoint) + PendingCheckpointerStats.restartpoints_timed++; + else + PendingCheckpointerStats.num_timed++; + } + + if (chkpt_or_rstpt_requested) + { + chkpt_or_rstpt_requested = false; + if (do_restartpoint) + PendingCheckpointerStats.restartpoints_requested++; + else + PendingCheckpointerStats.num_requested++; + } + /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag @@ -471,6 +491,9 @@ CheckpointerMain(void) * checkpoints happen at a predictable spacing. */ last_checkpoint_time = now; + + if (do_restartpoint) + PendingCheckpointerStats.restartpoints_performed++; } else { -- cgit v1.2.3