1 files changed, 63 insertions, 36 deletions
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index ba6c9b9183e..fe1ed795f91 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.550 2008/01/01 19:45:51 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.551 2008/01/11 00:54:09 tgl Exp $
  *
  * NOTES
  *
@@ -244,7 +244,7 @@ static bool FatalError = false; /* T if recovering from backend crash */
  * Notice that this state variable does not distinguish *why* we entered
  * PM_WAIT_BACKENDS or later states --- Shutdown and FatalError must be
  * consulted to find that out.	FatalError is never true in PM_RUN state, nor
- * in PM_SHUTDOWN state (because we don't enter that state when trying to
+ * in PM_SHUTDOWN states (because we don't enter those states when trying to
  * recover from a crash).  It can be true in PM_STARTUP state, because we
  * don't clear it until we've successfully recovered.
  */
@@ -255,6 +255,7 @@ typedef enum
 	PM_RUN,						/* normal "database is alive" state */
 	PM_WAIT_BACKENDS,			/* waiting for live backends to exit */
 	PM_SHUTDOWN,				/* waiting for bgwriter to do shutdown ckpt */
+	PM_SHUTDOWN_2,				/* waiting for archiver to finish */
 	PM_WAIT_DEAD_END,			/* waiting for dead_end children to exit */
 	PM_NO_CHILDREN				/* all important children have exited */
 } PMState;
@@ -1312,12 +1313,8 @@ ServerLoop(void)
 				start_autovac_launcher = false; /* signal processed */
 		}
 
-		/*
-		 * If we have lost the archiver, try to start a new one. We do this
-		 * even if we are shutting down, to allow archiver to take care of any
-		 * remaining WAL files.
-		 */
-		if (XLogArchivingActive() && PgArchPID == 0 && pmState >= PM_RUN)
+		/* If we have lost the archiver, try to start a new one */
+		if (XLogArchivingActive() && PgArchPID == 0 && pmState == PM_RUN)
 			PgArchPID = pgarch_start();
 
 		/* If we have lost the stats collector, try to start a new one */
@@ -2175,12 +2172,31 @@ reaper(SIGNAL_ARGS)
 				 * checkpoint.	(If for some reason it didn't, recovery will
 				 * occur on next postmaster start.)
 				 *
-				 * At this point we should have no normal children left (else
-				 * we'd not be in PM_SHUTDOWN state) but we might have
-				 * dead_end children.
+				 * At this point we should have no normal backend children
+				 * left (else we'd not be in PM_SHUTDOWN state) but we might
+				 * have dead_end children to wait for.
+				 *
+				 * If we have an archiver subprocess, tell it to do a last
+				 * archive cycle and quit; otherwise we can go directly to
+				 * PM_WAIT_DEAD_END state.
 				 */
 				Assert(Shutdown > NoShutdown);
-				pmState = PM_WAIT_DEAD_END;
+
+				if (PgArchPID != 0)
+				{
+					/* Waken archiver for the last time */
+					signal_child(PgArchPID, SIGUSR2);
+					pmState = PM_SHUTDOWN_2;
+				}
+				else
+					pmState = PM_WAIT_DEAD_END;
+
+				/*
+				 * We can also shut down the stats collector now; there's
+				 * nothing left for it to do.
+				 */
+				if (PgStatPID != 0)
+					signal_child(PgStatPID, SIGQUIT);
 			}
 			else
 			{
@@ -2227,7 +2243,8 @@ reaper(SIGNAL_ARGS)
 		/*
 		 * Was it the archiver?  If so, just try to start a new one; no need
 		 * to force reset of the rest of the system.  (If fail, we'll try
-		 * again in future cycles of the main loop.)
+		 * again in future cycles of the main loop.)  But if we were waiting
+		 * for it to shut down, advance to the next shutdown step.
 		 */
 		if (pid == PgArchPID)
 		{
@@ -2235,8 +2252,10 @@ reaper(SIGNAL_ARGS)
 			if (!EXIT_STATUS_0(exitstatus))
 				LogChildExit(LOG, _("archiver process"),
 							 pid, exitstatus);
-			if (XLogArchivingActive() && pmState >= PM_RUN)
+			if (XLogArchivingActive() && pmState == PM_RUN)
 				PgArchPID = pgarch_start();
+			else if (pmState == PM_SHUTDOWN_2)
+				pmState = PM_WAIT_DEAD_END;
 			continue;
 		}
 
@@ -2563,6 +2582,11 @@ PostmasterStateMachine(void)
 				 * change causes ServerLoop to stop creating new ones.
 				 */
 				pmState = PM_WAIT_DEAD_END;
+
+				/*
+				 * We already SIGQUIT'd the archiver and stats processes,
+				 * if any, when we entered FatalError state.
+				 */
 			}
 			else
 			{
@@ -2591,13 +2615,13 @@ PostmasterStateMachine(void)
 					 */
 					FatalError = true;
 					pmState = PM_WAIT_DEAD_END;
+
+					/* Kill the archiver and stats collector too */
+					if (PgArchPID != 0)
+						signal_child(PgArchPID, SIGQUIT);
+					if (PgStatPID != 0)
+						signal_child(PgStatPID, SIGQUIT);
 				}
-				/* Tell pgarch to shut down too; nothing left for it to do */
-				if (PgArchPID != 0)
-					signal_child(PgArchPID, SIGQUIT);
-				/* Tell pgstat to shut down too; nothing left for it to do */
-				if (PgStatPID != 0)
-					signal_child(PgStatPID, SIGQUIT);
 			}
 		}
 	}
@@ -2606,16 +2630,26 @@ PostmasterStateMachine(void)
 	{
 		/*
 		 * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
-		 * (ie, no dead_end children remain).
+		 * (ie, no dead_end children remain), and the archiver and stats
+		 * collector are gone too.
+		 *
+		 * The reason we wait for those two is to protect them against a new
+		 * postmaster starting conflicting subprocesses; this isn't an
+		 * ironclad protection, but it at least helps in the
+		 * shutdown-and-immediately-restart scenario.  Note that they have
+		 * already been sent appropriate shutdown signals, either during a
+		 * normal state transition leading up to PM_WAIT_DEAD_END, or during
+		 * FatalError processing.
 		 */
-		if (!DLGetHead(BackendList))
+		if (DLGetHead(BackendList) == NULL &&
+			PgArchPID == 0 && PgStatPID == 0)
 		{
 			/* These other guys should be dead already */
 			Assert(StartupPID == 0);
 			Assert(BgWriterPID == 0);
 			Assert(WalWriterPID == 0);
 			Assert(AutoVacPID == 0);
-			/* archiver, stats, and syslogger are not considered here */
+			/* syslogger is not considered here */
 			pmState = PM_NO_CHILDREN;
 		}
 	}
@@ -2628,14 +2662,9 @@ PostmasterStateMachine(void)
 	 * we got SIGTERM from init --- there may well not be time for recovery
 	 * before init decides to SIGKILL us.)
 	 *
-	 * Note: we do not wait around for exit of the archiver or stats
-	 * processes.  They've been sent SIGQUIT by this point (either when we
-	 * entered PM_SHUTDOWN state, or when we set FatalError, and at least one
-	 * of those must have happened by now).  In any case they contain logic to
-	 * commit hara-kiri if they notice the postmaster is gone.	Since they
-	 * aren't connected to shared memory, they pose no problem for shutdown.
-	 * The syslogger is not considered either, since it's intended to survive
-	 * till the postmaster exits.
+	 * Note that the syslogger continues to run.  It will exit when it sees
+	 * EOF on its input pipe, which happens when there are no more upstream
+	 * processes.
 	 */
 	if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
 	{
@@ -2652,10 +2681,8 @@ PostmasterStateMachine(void)
 	}
 
 	/*
-	 * If we need to recover from a crash, wait for all shmem-connected
-	 * children to exit, then reset shmem and StartupDataBase.	(We can ignore
-	 * the archiver and stats processes here since they are not connected to
-	 * shmem.)
+	 * If we need to recover from a crash, wait for all non-syslogger
+	 * children to exit, then reset shmem and StartupDataBase.
 	 */
 	if (FatalError && pmState == PM_NO_CHILDREN)
 	{
@@ -3782,7 +3809,7 @@ sigusr1_handler(SIGNAL_ARGS)
 	}
 
 	if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
-		PgArchPID != 0 && Shutdown <= SmartShutdown)
+		PgArchPID != 0)
 	{
 		/*
 		 * Send SIGUSR1 to archiver process, to wake it up and begin archiving