diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/postmaster/pgarch.c | 91 | ||||
| -rw-r--r-- | src/backend/postmaster/postmaster.c | 99 | 
2 files changed, 138 insertions, 52 deletions
| diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 6cb32fcb601..e181950c0fe 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -19,7 +19,7 @@   *   *   * IDENTIFICATION - *	  $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.37 2008/01/01 19:45:51 momjian Exp $ + *	  $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.38 2008/01/11 00:54:08 tgl Exp $   *   *-------------------------------------------------------------------------   */ @@ -77,12 +77,15 @@   * ----------   */  static time_t last_pgarch_start_time; +static time_t last_sigterm_time = 0;  /*   * Flags set by interrupt handlers for later service in the main loop.   */  static volatile sig_atomic_t got_SIGHUP = false; +static volatile sig_atomic_t got_SIGTERM = false;  static volatile sig_atomic_t wakened = false; +static volatile sig_atomic_t ready_to_stop = false;  /* ----------   * Local function forward declarations @@ -95,7 +98,9 @@ static pid_t pgarch_forkexec(void);  NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);  static void pgarch_exit(SIGNAL_ARGS);  static void ArchSigHupHandler(SIGNAL_ARGS); +static void ArchSigTermHandler(SIGNAL_ARGS);  static void pgarch_waken(SIGNAL_ARGS); +static void pgarch_waken_stop(SIGNAL_ARGS);  static void pgarch_MainLoop(void);  static void pgarch_ArchiverCopyLoop(void);  static bool pgarch_archiveXlog(char *xlog); @@ -236,16 +241,16 @@ PgArchiverMain(int argc, char *argv[])  	/*  	 * Ignore all signals usually bound to some action in the postmaster, -	 * except for SIGHUP, SIGUSR1 and SIGQUIT. +	 * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.  	 */  	pqsignal(SIGHUP, ArchSigHupHandler);  	pqsignal(SIGINT, SIG_IGN); -	pqsignal(SIGTERM, SIG_IGN); +	pqsignal(SIGTERM, ArchSigTermHandler);  	pqsignal(SIGQUIT, pgarch_exit);  	pqsignal(SIGALRM, SIG_IGN);  	pqsignal(SIGPIPE, SIG_IGN);  	pqsignal(SIGUSR1, pgarch_waken); -	pqsignal(SIGUSR2, SIG_IGN); +	pqsignal(SIGUSR2, pgarch_waken_stop);  	pqsignal(SIGCHLD, SIG_DFL);  	pqsignal(SIGTTIN, SIG_DFL);  	pqsignal(SIGTTOU, SIG_DFL); @@ -267,28 +272,47 @@ PgArchiverMain(int argc, char *argv[])  static void  pgarch_exit(SIGNAL_ARGS)  { -	/* -	 * For now, we just nail the doors shut and get out of town.  It might -	 * seem cleaner to finish up any pending archive copies, but there's a -	 * nontrivial risk that init will kill us partway through. -	 */ -	exit(0); +	/* SIGQUIT means curl up and die ... */ +	exit(1);  } -/* SIGHUP: set flag to re-read config file at next convenient time */ +/* SIGHUP signal handler for archiver process */  static void  ArchSigHupHandler(SIGNAL_ARGS)  { +	/* set flag to re-read config file at next convenient time */  	got_SIGHUP = true;  } +/* SIGTERM signal handler for archiver process */ +static void +ArchSigTermHandler(SIGNAL_ARGS) +{ +	/* +	 * The postmaster never sends us SIGTERM, so we assume that this means +	 * that init is trying to shut down the whole system.  If we hang around +	 * too long we'll get SIGKILL'd.  Set flag to prevent starting any more +	 * archive commands. +	 */ +	got_SIGTERM = true; +} +  /* SIGUSR1 signal handler for archiver process */  static void  pgarch_waken(SIGNAL_ARGS)  { +	/* set flag that there is work to be done */  	wakened = true;  } +/* SIGUSR2 signal handler for archiver process */ +static void +pgarch_waken_stop(SIGNAL_ARGS) +{ +	/* set flag to do a final cycle and shut down afterwards */ +	ready_to_stop = true; +} +  /*   * pgarch_MainLoop   * @@ -298,6 +322,7 @@ static void  pgarch_MainLoop(void)  {  	time_t		last_copy_time = 0; +	bool		time_to_stop;  	/*  	 * We run the copy loop immediately upon entry, in case there are @@ -309,6 +334,9 @@ pgarch_MainLoop(void)  	do  	{ +		/* When we get SIGUSR2, we do one more archive cycle, then exit */ +		time_to_stop = ready_to_stop; +  		/* Check for config update */  		if (got_SIGHUP)  		{ @@ -316,8 +344,26 @@ pgarch_MainLoop(void)  			ProcessConfigFile(PGC_SIGHUP);  		} +		/* +		 * If we've gotten SIGTERM, we normally just sit and do nothing until +		 * SIGUSR2 arrives.  However, that means a random SIGTERM would +		 * disable archiving indefinitely, which doesn't seem like a good +		 * idea.  If more than 60 seconds pass since SIGTERM, exit anyway, +		 * so that the postmaster can start a new archiver if needed. +		 */ +		if (got_SIGTERM) +		{ +			time_t		curtime = time(NULL); + +			if (last_sigterm_time == 0) +				last_sigterm_time = curtime; +			else if ((unsigned int) (curtime - last_sigterm_time) >= +					 (unsigned int) 60) +				break; +		} +  		/* Do what we're here for */ -		if (wakened) +		if (wakened || time_to_stop)  		{  			wakened = false;  			pgarch_ArchiverCopyLoop(); @@ -334,7 +380,8 @@ pgarch_MainLoop(void)  		 * sleep into 1-second increments, and check for interrupts after each  		 * nap.  		 */ -		while (!(wakened || got_SIGHUP)) +		while (!(wakened || ready_to_stop || got_SIGHUP || +				 !PostmasterIsAlive(true)))  		{  			time_t		curtime; @@ -344,7 +391,13 @@ pgarch_MainLoop(void)  				(unsigned int) PGARCH_AUTOWAKE_INTERVAL)  				wakened = true;  		} -	} while (PostmasterIsAlive(true)); + +		/* +		 * The archiver quits either when the postmaster dies (not expected) +		 * or after completing one more archiving cycle after receiving +		 * SIGUSR2. +		 */ +	} while (PostmasterIsAlive(true) && !time_to_stop);  }  /* @@ -377,8 +430,14 @@ pgarch_ArchiverCopyLoop(void)  		for (;;)  		{ -			/* Abandon processing if we notice our postmaster has died */ -			if (!PostmasterIsAlive(true)) +			/* +			 * Do not initiate any more archive commands after receiving +			 * SIGTERM, nor after the postmaster has died unexpectedly. +			 * The first condition is to try to keep from having init +			 * SIGKILL the command, and the second is to avoid conflicts +			 * with another archiver spawned by a newer postmaster. +			 */ +			if (got_SIGTERM || !PostmasterIsAlive(true))  				return;  			if (pgarch_archiveXlog(xlog)) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index ba6c9b9183e..fe1ed795f91 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@   *   *   * IDENTIFICATION - *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.550 2008/01/01 19:45:51 momjian Exp $ + *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.551 2008/01/11 00:54:09 tgl Exp $   *   * NOTES   * @@ -244,7 +244,7 @@ static bool FatalError = false; /* T if recovering from backend crash */   * Notice that this state variable does not distinguish *why* we entered   * PM_WAIT_BACKENDS or later states --- Shutdown and FatalError must be   * consulted to find that out.	FatalError is never true in PM_RUN state, nor - * in PM_SHUTDOWN state (because we don't enter that state when trying to + * in PM_SHUTDOWN states (because we don't enter those states when trying to   * recover from a crash).  It can be true in PM_STARTUP state, because we   * don't clear it until we've successfully recovered.   */ @@ -255,6 +255,7 @@ typedef enum  	PM_RUN,						/* normal "database is alive" state */  	PM_WAIT_BACKENDS,			/* waiting for live backends to exit */  	PM_SHUTDOWN,				/* waiting for bgwriter to do shutdown ckpt */ +	PM_SHUTDOWN_2,				/* waiting for archiver to finish */  	PM_WAIT_DEAD_END,			/* waiting for dead_end children to exit */  	PM_NO_CHILDREN				/* all important children have exited */  } PMState; @@ -1312,12 +1313,8 @@ ServerLoop(void)  				start_autovac_launcher = false; /* signal processed */  		} -		/* -		 * If we have lost the archiver, try to start a new one. We do this -		 * even if we are shutting down, to allow archiver to take care of any -		 * remaining WAL files. -		 */ -		if (XLogArchivingActive() && PgArchPID == 0 && pmState >= PM_RUN) +		/* If we have lost the archiver, try to start a new one */ +		if (XLogArchivingActive() && PgArchPID == 0 && pmState == PM_RUN)  			PgArchPID = pgarch_start();  		/* If we have lost the stats collector, try to start a new one */ @@ -2175,12 +2172,31 @@ reaper(SIGNAL_ARGS)  				 * checkpoint.	(If for some reason it didn't, recovery will  				 * occur on next postmaster start.)  				 * -				 * At this point we should have no normal children left (else -				 * we'd not be in PM_SHUTDOWN state) but we might have -				 * dead_end children. +				 * At this point we should have no normal backend children +				 * left (else we'd not be in PM_SHUTDOWN state) but we might +				 * have dead_end children to wait for. +				 * +				 * If we have an archiver subprocess, tell it to do a last +				 * archive cycle and quit; otherwise we can go directly to +				 * PM_WAIT_DEAD_END state.  				 */  				Assert(Shutdown > NoShutdown); -				pmState = PM_WAIT_DEAD_END; + +				if (PgArchPID != 0) +				{ +					/* Waken archiver for the last time */ +					signal_child(PgArchPID, SIGUSR2); +					pmState = PM_SHUTDOWN_2; +				} +				else +					pmState = PM_WAIT_DEAD_END; + +				/* +				 * We can also shut down the stats collector now; there's +				 * nothing left for it to do. +				 */ +				if (PgStatPID != 0) +					signal_child(PgStatPID, SIGQUIT);  			}  			else  			{ @@ -2227,7 +2243,8 @@ reaper(SIGNAL_ARGS)  		/*  		 * Was it the archiver?  If so, just try to start a new one; no need  		 * to force reset of the rest of the system.  (If fail, we'll try -		 * again in future cycles of the main loop.) +		 * again in future cycles of the main loop.)  But if we were waiting +		 * for it to shut down, advance to the next shutdown step.  		 */  		if (pid == PgArchPID)  		{ @@ -2235,8 +2252,10 @@ reaper(SIGNAL_ARGS)  			if (!EXIT_STATUS_0(exitstatus))  				LogChildExit(LOG, _("archiver process"),  							 pid, exitstatus); -			if (XLogArchivingActive() && pmState >= PM_RUN) +			if (XLogArchivingActive() && pmState == PM_RUN)  				PgArchPID = pgarch_start(); +			else if (pmState == PM_SHUTDOWN_2) +				pmState = PM_WAIT_DEAD_END;  			continue;  		} @@ -2563,6 +2582,11 @@ PostmasterStateMachine(void)  				 * change causes ServerLoop to stop creating new ones.  				 */  				pmState = PM_WAIT_DEAD_END; + +				/* +				 * We already SIGQUIT'd the archiver and stats processes, +				 * if any, when we entered FatalError state. +				 */  			}  			else  			{ @@ -2591,13 +2615,13 @@ PostmasterStateMachine(void)  					 */  					FatalError = true;  					pmState = PM_WAIT_DEAD_END; + +					/* Kill the archiver and stats collector too */ +					if (PgArchPID != 0) +						signal_child(PgArchPID, SIGQUIT); +					if (PgStatPID != 0) +						signal_child(PgStatPID, SIGQUIT);  				} -				/* Tell pgarch to shut down too; nothing left for it to do */ -				if (PgArchPID != 0) -					signal_child(PgArchPID, SIGQUIT); -				/* Tell pgstat to shut down too; nothing left for it to do */ -				if (PgStatPID != 0) -					signal_child(PgStatPID, SIGQUIT);  			}  		}  	} @@ -2606,16 +2630,26 @@ PostmasterStateMachine(void)  	{  		/*  		 * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty -		 * (ie, no dead_end children remain). +		 * (ie, no dead_end children remain), and the archiver and stats +		 * collector are gone too. +		 * +		 * The reason we wait for those two is to protect them against a new +		 * postmaster starting conflicting subprocesses; this isn't an +		 * ironclad protection, but it at least helps in the +		 * shutdown-and-immediately-restart scenario.  Note that they have +		 * already been sent appropriate shutdown signals, either during a +		 * normal state transition leading up to PM_WAIT_DEAD_END, or during +		 * FatalError processing.  		 */ -		if (!DLGetHead(BackendList)) +		if (DLGetHead(BackendList) == NULL && +			PgArchPID == 0 && PgStatPID == 0)  		{  			/* These other guys should be dead already */  			Assert(StartupPID == 0);  			Assert(BgWriterPID == 0);  			Assert(WalWriterPID == 0);  			Assert(AutoVacPID == 0); -			/* archiver, stats, and syslogger are not considered here */ +			/* syslogger is not considered here */  			pmState = PM_NO_CHILDREN;  		}  	} @@ -2628,14 +2662,9 @@ PostmasterStateMachine(void)  	 * we got SIGTERM from init --- there may well not be time for recovery  	 * before init decides to SIGKILL us.)  	 * -	 * Note: we do not wait around for exit of the archiver or stats -	 * processes.  They've been sent SIGQUIT by this point (either when we -	 * entered PM_SHUTDOWN state, or when we set FatalError, and at least one -	 * of those must have happened by now).  In any case they contain logic to -	 * commit hara-kiri if they notice the postmaster is gone.	Since they -	 * aren't connected to shared memory, they pose no problem for shutdown. -	 * The syslogger is not considered either, since it's intended to survive -	 * till the postmaster exits. +	 * Note that the syslogger continues to run.  It will exit when it sees +	 * EOF on its input pipe, which happens when there are no more upstream +	 * processes.  	 */  	if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)  	{ @@ -2652,10 +2681,8 @@ PostmasterStateMachine(void)  	}  	/* -	 * If we need to recover from a crash, wait for all shmem-connected -	 * children to exit, then reset shmem and StartupDataBase.	(We can ignore -	 * the archiver and stats processes here since they are not connected to -	 * shmem.) +	 * If we need to recover from a crash, wait for all non-syslogger +	 * children to exit, then reset shmem and StartupDataBase.  	 */  	if (FatalError && pmState == PM_NO_CHILDREN)  	{ @@ -3782,7 +3809,7 @@ sigusr1_handler(SIGNAL_ARGS)  	}  	if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) && -		PgArchPID != 0 && Shutdown <= SmartShutdown) +		PgArchPID != 0)  	{  		/*  		 * Send SIGUSR1 to archiver process, to wake it up and begin archiving | 
