diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-05-29 22:48:23 +0000 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-05-29 22:48:23 +0000 |
| commit | 076a055acf3c55314de267c62b03191586d79cf6 (patch) | |
| tree | 1bfb8a8755cd393c6615bd55a7a4bb7ad404781c /src/backend/postmaster/postmaster.c | |
| parent | d531fd2cdc819e7ca026c2ab9e65a7f7c49b1906 (diff) | |
Separate out bgwriter code into a logically separate module, rather
than being random pieces of other files. Give bgwriter responsibility
for all checkpoint activity (other than a post-recovery checkpoint);
so this child process absorbs the functionality of the former transient
checkpoint and shutdown subprocesses. While at it, create an actual
include file for postmaster.c, which for some reason never had its own
file before.
Diffstat (limited to 'src/backend/postmaster/postmaster.c')
| -rw-r--r-- | src/backend/postmaster/postmaster.c | 811 |
1 files changed, 329 insertions, 482 deletions
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 89a22815098..e7186c01b39 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,14 +37,13 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.399 2004/05/28 15:14:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.400 2004/05/29 22:48:19 tgl Exp $ * * NOTES * * Initialization: - * The Postmaster sets up a few shared memory data structures - * for the backends. It should at the very least initialize the - * lock manager. + * The Postmaster sets up shared memory data structures + * for the backends. * * Synchronization: * The Postmaster shares memory with the backends but should avoid @@ -97,6 +96,7 @@ #include "libpq/pqsignal.h" #include "miscadmin.h" #include "nodes/nodes.h" +#include "postmaster/postmaster.h" #include "pgtime.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -113,22 +113,14 @@ #include "pgstat.h" -#ifdef HAVE_SIGPROCMASK -sigset_t UnBlockSig, - BlockSig, - AuthBlockSig; - -#else -int UnBlockSig, - BlockSig, - AuthBlockSig; -#endif - /* * List of active backends (or child processes anyway; we don't actually * know whether a given child has become a backend or is still in the * authorization phase). This is used mainly to keep track of how many * children we have and send them appropriate signals when necessary. + * + * "Special" children such as the startup and bgwriter tasks are not in + * this list. */ typedef struct bkend { @@ -149,15 +141,6 @@ char *UnixSocketDir; char *ListenAddresses; /* - * MaxBackends is the limit on the number of backends we can start. - * Note that a larger MaxBackends value will increase the size of the - * shared memory area as well as cause the postmaster to grab more - * kernel semaphores, even if you never actually use that many - * backends. - */ -int MaxBackends; - -/* * ReservedBackends is the number of backends reserved for superuser use. * This number is taken out of the pool size given by MaxBackends so * number of backend slots available to non-superusers is @@ -196,9 +179,6 @@ bool SilentMode = false; /* silent mode (-S) */ int PreAuthDelay = 0; int AuthenticationTimeout = 60; -int CheckPointTimeout = 300; -int CheckPointWarning = 30; -time_t LastSignalledCheckpoint = 0; bool log_hostname; /* for ps display and logging */ bool Log_connections = false; @@ -209,13 +189,11 @@ char *rendezvous_name; /* list of library:init-function to be preloaded */ char *preload_libraries_string = NULL; -/* Startup/shutdown state */ +/* PIDs of special child processes; 0 when not running */ static pid_t StartupPID = 0, - ShutdownPID = 0, - CheckPointPID = 0, BgWriterPID = 0; -static time_t checkpointed = 0; +/* Startup/shutdown state */ #define NoShutdown 0 #define SmartShutdown 1 #define FastShutdown 2 @@ -232,7 +210,6 @@ bool ClientAuthInProgress = false; /* T during new-client * Also, the global MyCancelKey passes the cancel key assigned to a given * backend from the postmaster to that backend (via fork). */ - static unsigned int random_seed = 0; static int debug_flag = 0; @@ -263,6 +240,7 @@ static void reaper(SIGNAL_ARGS); static void sigusr1_handler(SIGNAL_ARGS); static void dummy_handler(SIGNAL_ARGS); static void CleanupProc(int pid, int exitstatus); +static void HandleChildCrash(int pid, int exitstatus); static void LogChildExit(int lev, const char *procname, int pid, int exitstatus); static int BackendRun(Port *port); @@ -280,7 +258,7 @@ static void RandomSalt(char *cryptSalt, char *md5Salt); static void SignalChildren(int signal); static int CountChildren(void); static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); -static pid_t SSDataBase(int xlop); +static pid_t StartChildProcess(int xlop); static void postmaster_error(const char *fmt,...) /* This lets gcc check the format string for consistency. */ @@ -311,10 +289,8 @@ static void ShmemBackendArrayRemove(pid_t pid); #endif /* EXEC_BACKEND */ -#define StartupDataBase() SSDataBase(BS_XLOG_STARTUP) -#define CheckPointDataBase() SSDataBase(BS_XLOG_CHECKPOINT) -#define StartBackgroundWriter() SSDataBase(BS_XLOG_BGWRITER) -#define ShutdownDataBase() SSDataBase(BS_XLOG_SHUTDOWN) +#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP) +#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER) /* @@ -325,14 +301,13 @@ PostmasterMain(int argc, char *argv[]) { int opt; int status; - char original_extraoptions[MAXPGPATH]; char *potential_DataDir = NULL; int i; - *original_extraoptions = '\0'; - progname = get_progname(argv[0]); + MyProcPid = PostmasterPid = getpid(); + IsPostmasterEnvironment = true; /* @@ -359,8 +334,6 @@ PostmasterMain(int argc, char *argv[]) */ umask((mode_t) 0077); - MyProcPid = PostmasterPid = getpid(); - /* * Fire up essential subsystems: memory management */ @@ -470,12 +443,10 @@ PostmasterMain(int argc, char *argv[]) case 'o': /* - * Other options to pass to the backend on the command - * line -- useful only for debugging. + * Other options to pass to the backend on the command line */ strcat(ExtraOptions, " "); strcat(ExtraOptions, optarg); - strcpy(original_extraoptions, optarg); break; case 'p': SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV); @@ -656,7 +627,7 @@ PostmasterMain(int argc, char *argv[]) * We want to do this before we try to grab the input sockets, because * the data directory interlock is more reliable than the socket-file * interlock (thanks to whoever decided to put socket files in /tmp - * :-(). For the same reason, it's best to grab the TCP socket before + * :-(). For the same reason, it's best to grab the TCP socket(s) before * the Unix socket. */ CreateDataDirLockFile(DataDir, true); @@ -766,12 +737,13 @@ PostmasterMain(int argc, char *argv[]) BackendList = DLNewList(); #ifdef WIN32 - /* - * Initialize the child pid/HANDLE arrays + * Initialize the child pid/HANDLE arrays for signal handling. */ - win32_childPIDArray = (pid_t *) malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t)); - win32_childHNDArray = (HANDLE *) malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE)); + win32_childPIDArray = (pid_t *) + malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t)); + win32_childHNDArray = (HANDLE *) + malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE)); if (!win32_childPIDArray || !win32_childHNDArray) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), @@ -791,16 +763,16 @@ PostmasterMain(int argc, char *argv[]) * * CAUTION: when changing this list, check for side-effects on the signal * handling setup of child processes. See tcop/postgres.c, - * bootstrap/bootstrap.c, and postmaster/pgstat.c. + * bootstrap/bootstrap.c, postmaster/bgwriter.c, and postmaster/pgstat.c. */ pqinitmask(); PG_SETMASK(&BlockSig); pqsignal(SIGHUP, SIGHUP_handler); /* reread config file and have * children do same */ - pqsignal(SIGINT, pmdie); /* send SIGTERM and ShutdownDataBase */ + pqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */ pqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */ - pqsignal(SIGTERM, pmdie); /* wait for children and ShutdownDataBase */ + pqsignal(SIGTERM, pmdie); /* wait for children and shut down */ pqsignal(SIGALRM, SIG_IGN); /* ignored */ pqsignal(SIGPIPE, SIG_IGN); /* ignored */ pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */ @@ -823,19 +795,6 @@ PostmasterMain(int argc, char *argv[]) whereToSendOutput = None; /* - * On many platforms, the first call of localtime() incurs significant - * overhead to load timezone info from the system configuration files. - * By doing it once in the postmaster, we avoid having to do it in - * every started child process. The savings are not huge, but they - * add up... - */ - { - time_t now = time(NULL); - - (void) pg_localtime(&now); - } - - /* * Initialize and try to startup the statistics collector process */ pgstat_init(); @@ -956,10 +915,7 @@ reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context) static void pmdaemonize(void) { -#ifdef WIN32 - /* not supported */ - elog(FATAL, "SilentMode not supported under WIN32"); -#else +#ifndef WIN32 int i; pid_t pid; @@ -989,7 +945,7 @@ pmdaemonize(void) setitimer(ITIMER_PROF, &prof_itimer, NULL); #endif - MyProcPid = getpid(); /* reset MyProcPid to child */ + MyProcPid = PostmasterPid = getpid(); /* reset PID vars to child */ /* GH: If there's no setsid(), we hopefully don't need silent mode. * Until there's a better solution. @@ -1007,7 +963,10 @@ pmdaemonize(void) dup2(i, 1); dup2(i, 2); close(i); -#endif +#else /* WIN32 */ + /* not supported */ + elog(FATAL, "SilentMode not supported under WIN32"); +#endif /* WIN32 */ } @@ -1053,19 +1012,21 @@ usage(const char *progname) /* - * Main loop of postmaster + * Main idle loop of postmaster */ static int ServerLoop(void) { fd_set readmask; int nSockets; - struct timeval now, + time_t now, + last_touch_time; + struct timeval earlier, later; struct timezone tz; - int i; - gettimeofday(&now, &tz); + gettimeofday(&earlier, &tz); + last_touch_time = time(NULL); nSockets = initMasks(&readmask); @@ -1074,70 +1035,32 @@ ServerLoop(void) Port *port; fd_set rmask; struct timeval timeout; + int selres; + int i; /* - * The timeout for the select() below is normally set on the basis - * of the time to the next checkpoint. However, if for some - * reason we don't have a next-checkpoint time, time out after 60 - * seconds. This keeps checkpoint scheduling from locking up when - * we get new connection requests infrequently (since we are - * likely to detect checkpoint completion just after enabling - * signals below, after we've already made the decision about how - * long to wait this time). + * Wait for something to happen. + * + * We wait at most one minute, to ensure that the other background + * tasks handled below get done even when no requests are arriving. */ + memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); + timeout.tv_sec = 60; timeout.tv_usec = 0; - if (CheckPointPID == 0 && checkpointed && - StartupPID == 0 && Shutdown == NoShutdown && - !FatalError && random_seed != 0) - { - time_t now = time(NULL); - - if (CheckPointTimeout + checkpointed > now) - { - /* - * Not time for checkpoint yet, so set select timeout - */ - timeout.tv_sec = CheckPointTimeout + checkpointed - now; - } - else - { - /* Time to make the checkpoint... */ - CheckPointPID = CheckPointDataBase(); - - /* - * if fork failed, schedule another try at 0.1 normal - * delay - */ - if (CheckPointPID == 0) - { - timeout.tv_sec = CheckPointTimeout / 10; - checkpointed = now + timeout.tv_sec - CheckPointTimeout; - } - } - } + PG_SETMASK(&UnBlockSig); - /* - * If no background writer process is running and we should do - * background writing, start one. It doesn't matter if this fails, - * we'll just try again later. - */ - if (BgWriterPID == 0 && BgWriterPercent > 0 && - StartupPID == 0 && Shutdown == NoShutdown && - !FatalError && random_seed != 0) - BgWriterPID = StartBackgroundWriter(); + selres = select(nSockets, &rmask, NULL, NULL, &timeout); /* - * Wait for something to happen. + * Block all signals until we wait again. (This makes it safe for + * our signal handlers to do nontrivial work.) */ - memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); - - PG_SETMASK(&UnBlockSig); + PG_SETMASK(&BlockSig); - if (select(nSockets, &rmask, NULL, NULL, &timeout) < 0) + if (selres < 0) { - PG_SETMASK(&BlockSig); if (errno == EINTR || errno == EWOULDBLOCK) continue; ereport(LOG, @@ -1147,63 +1070,85 @@ ServerLoop(void) } /* - * Block all signals until we wait again. (This makes it safe for - * our signal handlers to do nontrivial work.) - */ - PG_SETMASK(&BlockSig); - - /* - * Select a random seed at the time of first receiving a request. + * New connection pending on any of our sockets? If so, fork a + * child process to deal with it. */ - while (random_seed == 0) + if (selres > 0) { - gettimeofday(&later, &tz); - /* - * We are not sure how much precision is in tv_usec, so we - * swap the nibbles of 'later' and XOR them with 'now'. On the - * off chance that the result is 0, we loop until it isn't. + * Select a random seed at the time of first receiving a request. */ - random_seed = now.tv_usec ^ - ((later.tv_usec << 16) | - ((later.tv_usec >> 16) & 0xffff)); - } + while (random_seed == 0) + { + gettimeofday(&later, &tz); - /* - * New connection pending on any of our sockets? If so, fork a - * child process to deal with it. - */ - for (i = 0; i < MAXLISTEN; i++) - { - if (ListenSocket[i] == -1) - break; - if (FD_ISSET(ListenSocket[i], &rmask)) + /* + * We are not sure how much precision is in tv_usec, so we + * swap the nibbles of 'later' and XOR them with 'earlier'. On + * the off chance that the result is 0, we loop until it isn't. + */ + random_seed = earlier.tv_usec ^ + ((later.tv_usec << 16) | + ((later.tv_usec >> 16) & 0xffff)); + } + + for (i = 0; i < MAXLISTEN; i++) { - port = ConnCreate(ListenSocket[i]); - if (port) + if (ListenSocket[i] == -1) + break; + if (FD_ISSET(ListenSocket[i], &rmask)) { - BackendStartup(port); - - /* - * We no longer need the open socket or port structure - * in this process - */ - StreamClose(port->sock); - ConnFree(port); + port = ConnCreate(ListenSocket[i]); + if (port) + { + BackendStartup(port); + + /* + * We no longer need the open socket or port structure + * in this process + */ + StreamClose(port->sock); + ConnFree(port); + } } } } + /* + * If no background writer process is running, and we are not in + * a state that prevents it, start one. It doesn't matter if this + * fails, we'll just try again later. + */ + if (BgWriterPID == 0 && StartupPID == 0 && !FatalError) + { + BgWriterPID = StartBackgroundWriter(); + /* If shutdown is pending, set it going */ + if (Shutdown > NoShutdown && BgWriterPID != 0) + kill(BgWriterPID, SIGUSR2); + } + /* If we have lost the stats collector, try to start a new one */ if (!pgstat_is_running) pgstat_start(); + + /* + * Touch the socket and lock file at least every ten minutes, to ensure + * that they are not removed by overzealous /tmp-cleaning tasks. + */ + now = time(NULL); + if (now - last_touch_time >= 10 * 60) + { + TouchSocketFile(); + TouchSocketLockFile(); + last_touch_time = now; + } } } /* - * Initialise the masks for select() for the ports - * we are listening on. Return the number of sockets to listen on. + * Initialise the masks for select() for the ports we are listening on. + * Return the number of sockets to listen on. */ static int initMasks(fd_set *rmask) @@ -1543,14 +1488,7 @@ processCancelRequest(Port *port, void *pkt) backendPID = (int) ntohl(canc->backendPID); cancelAuthCode = (long) ntohl(canc->cancelAuthCode); - if (backendPID == CheckPointPID) - { - ereport(DEBUG2, - (errmsg_internal("ignoring cancel request for checkpoint process %d", - backendPID))); - return; - } - else if (backendPID == BgWriterPID) + if (backendPID == BgWriterPID) { ereport(DEBUG2, (errmsg_internal("ignoring cancel request for bgwriter process %d", @@ -1561,7 +1499,7 @@ processCancelRequest(Port *port, void *pkt) /* * See if we have a matching backend. In the EXEC_BACKEND case, we * can no longer access the postmaster's own backend list, and must - * rely on the backup array in shared memory. + * rely on the duplicate array in shared memory. */ #ifndef EXEC_BACKEND for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr)) @@ -1687,7 +1625,7 @@ ConnFree(Port *conn) * them open, of course. */ void -ClosePostmasterPorts(bool pgstat_too) +ClosePostmasterPorts(void) { int i; @@ -1700,10 +1638,6 @@ ClosePostmasterPorts(bool pgstat_too) ListenSocket[i] = -1; } } - - /* Close pgstat control sockets, unless we're starting pgstat itself */ - if (pgstat_too) - pgstat_close_sockets(); } @@ -1741,6 +1675,8 @@ SIGHUP_handler(SIGNAL_ARGS) (errmsg("received SIGHUP, reloading configuration files"))); ProcessConfigFile(PGC_SIGHUP); SignalChildren(SIGHUP); + if (BgWriterPID != 0) + kill(BgWriterPID, SIGHUP); load_hba(); load_ident(); @@ -1748,13 +1684,6 @@ SIGHUP_handler(SIGNAL_ARGS) /* Update the starting-point file for future children */ write_nondefault_variables(PGC_SIGHUP); #endif - - /* - * Tell the background writer to terminate so that we will start a - * new one with a possibly changed config - */ - if (BgWriterPID != 0) - kill(BgWriterPID, SIGTERM); } PG_SETMASK(&UnBlockSig); @@ -1780,11 +1709,10 @@ pmdie(SIGNAL_ARGS) switch (postgres_signal_arg) { case SIGTERM: - /* * Smart Shutdown: * - * Wait for children to end their work and ShutdownDataBase. + * Wait for children to end their work, then shut down. */ if (Shutdown >= SmartShutdown) break; @@ -1792,36 +1720,28 @@ pmdie(SIGNAL_ARGS) ereport(LOG, (errmsg("received smart shutdown request"))); - /* Must tell bgwriter to quit, or it never will... */ - if (BgWriterPID != 0) - kill(BgWriterPID, SIGTERM); - - if (DLGetHead(BackendList)) /* let reaper() handle this */ - break; + if (DLGetHead(BackendList)) + break; /* let reaper() handle this */ /* - * No children left. Shutdown data base system. + * No children left. Begin shutdown of data base system. */ - if (StartupPID > 0 || FatalError) /* let reaper() handle - * this */ - break; - if (ShutdownPID > 0) - { - elog(PANIC, "shutdown process %d already running", - (int) ShutdownPID); - abort(); - } - - ShutdownPID = ShutdownDataBase(); + if (StartupPID != 0 || FatalError) + break; /* let reaper() handle this */ + /* Start the bgwriter if not running */ + if (BgWriterPID == 0) + BgWriterPID = StartBackgroundWriter(); + /* And tell it to shut down */ + if (BgWriterPID != 0) + kill(BgWriterPID, SIGUSR2); break; case SIGINT: - /* * Fast Shutdown: * * Abort all children with SIGTERM (rollback active transactions - * and exit) and ShutdownDataBase when they are gone. + * and exit) and shut down when they are gone. */ if (Shutdown >= FastShutdown) break; @@ -1842,33 +1762,34 @@ pmdie(SIGNAL_ARGS) } /* - * No children left. Shutdown data base system. + * No children left. Begin shutdown of data base system. * - * Unlike the previous case, it is not an error for the shutdown - * process to be running already (we could get SIGTERM - * followed shortly later by SIGINT). + * Note: if we previously got SIGTERM then we may send SIGUSR2 + * to the bgwriter a second time here. This should be harmless. */ - if (StartupPID > 0 || FatalError) /* let reaper() handle - * this */ - break; - if (ShutdownPID == 0) - ShutdownPID = ShutdownDataBase(); + if (StartupPID != 0 || FatalError) + break; /* let reaper() handle this */ + /* Start the bgwriter if not running */ + if (BgWriterPID == 0) + BgWriterPID = StartBackgroundWriter(); + /* And tell it to shut down */ + if (BgWriterPID != 0) + kill(BgWriterPID, SIGUSR2); break; case SIGQUIT: - /* * Immediate Shutdown: * * abort all children with SIGQUIT and exit without attempt to - * properly shutdown data base system. + * properly shut down data base system. */ ereport(LOG, (errmsg("received immediate shutdown request"))); - if (ShutdownPID > 0) - kill(ShutdownPID, SIGQUIT); - if (StartupPID > 0) + if (StartupPID != 0) kill(StartupPID, SIGQUIT); + if (BgWriterPID != 0) + kill(BgWriterPID, SIGQUIT); if (DLGetHead(BackendList)) SignalChildren(SIGQUIT); ExitPostmaster(0); @@ -1939,22 +1860,11 @@ reaper(SIGNAL_ARGS) } /* - * Check if this child was a shutdown or startup process. + * Check if this child was a startup process. */ - if (ShutdownPID > 0 && pid == ShutdownPID) - { - if (exitstatus != 0) - { - LogChildExit(LOG, gettext("shutdown process"), - pid, exitstatus); - ExitPostmaster(1); - } - /* Normal postmaster exit is here */ - ExitPostmaster(0); - } - - if (StartupPID > 0 && pid == StartupPID) + if (StartupPID != 0 && pid == StartupPID) { + StartupPID = 0; if (exitstatus != 0) { LogChildExit(LOG, gettext("startup process"), @@ -1963,7 +1873,6 @@ reaper(SIGNAL_ARGS) (errmsg("aborting startup due to startup process failure"))); ExitPostmaster(1); } - StartupPID = 0; /* * Startup succeeded - we are done with system startup or recovery. @@ -1971,33 +1880,51 @@ reaper(SIGNAL_ARGS) FatalError = false; /* - * Arrange for first checkpoint to occur after standard delay. + * Crank up the background writer. It doesn't matter if this + * fails, we'll just try again later. */ - CheckPointPID = 0; - checkpointed = time(NULL); + Assert(BgWriterPID == 0); + BgWriterPID = StartBackgroundWriter(); /* * Go to shutdown mode if a shutdown request was pending. */ - if (Shutdown > NoShutdown) + if (Shutdown > NoShutdown && BgWriterPID != 0) + kill(BgWriterPID, SIGUSR2); + + continue; + } + + /* + * Was it the bgwriter? + */ + if (BgWriterPID != 0 && pid == BgWriterPID) + { + if (exitstatus == 0 && Shutdown > NoShutdown && + !FatalError && !DLGetHead(BackendList)) { - if (ShutdownPID > 0) - { - elog(PANIC, "startup process %d died while shutdown process %d already running", - pid, (int) ShutdownPID); - abort(); - } - ShutdownPID = ShutdownDataBase(); + /* + * Normal postmaster exit is here: we've seen normal + * exit of the bgwriter after it's been told to shut down. + * We expect that it wrote a shutdown checkpoint. (If + * for some reason it didn't, recovery will occur on next + * postmaster start.) + */ + ExitPostmaster(0); } - - goto reaper_done; + /* + * Any unexpected exit of the bgwriter is treated as a crash. + */ + LogChildExit(DEBUG2, gettext("background writer process"), + pid, exitstatus); + HandleChildCrash(pid, exitstatus); + continue; } /* - * Else do standard child cleanup. + * Else do standard backend child cleanup. */ CleanupProc(pid, exitstatus); - } /* loop over pending child-death reports */ if (FatalError) @@ -2006,7 +1933,7 @@ reaper(SIGNAL_ARGS) * Wait for all children exit, then reset shmem and * StartupDataBase. */ - if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0) + if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0) goto reaper_done; ereport(LOG, (errmsg("all server processes terminated; reinitializing"))); @@ -2021,11 +1948,14 @@ reaper(SIGNAL_ARGS) if (Shutdown > NoShutdown) { - if (DLGetHead(BackendList)) - goto reaper_done; - if (StartupPID > 0 || ShutdownPID > 0) + if (DLGetHead(BackendList) || StartupPID != 0) goto reaper_done; - ShutdownPID = ShutdownDataBase(); + /* Start the bgwriter if not running */ + if (BgWriterPID == 0) + BgWriterPID = StartBackgroundWriter(); + /* And tell it to shut down */ + if (BgWriterPID != 0) + kill(BgWriterPID, SIGUSR2); } reaper_done: @@ -2044,11 +1974,9 @@ static void CleanupProc(int pid, int exitstatus) /* child's exit status. */ { - Dlelem *curr, - *next; - Backend *bp; + Dlelem *curr; - LogChildExit(DEBUG2, gettext("child process"), pid, exitstatus); + LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus); /* * If a backend dies in an ugly way (i.e. exit status not 0) then we @@ -2056,61 +1984,81 @@ CleanupProc(int pid, * we assume everything is hunky dory and simply remove the backend * from the active backend list. */ - if (exitstatus == 0) + if (exitstatus != 0) + { + HandleChildCrash(pid, exitstatus); + return; + } + + for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr)) { - curr = DLGetHead(BackendList); - while (curr) + Backend *bp = (Backend *) DLE_VAL(curr); + + if (bp->pid == pid) { - bp = (Backend *) DLE_VAL(curr); - if (bp->pid == pid) - { + DLRemove(curr); + free(bp); + DLFreeElem(curr); #ifdef EXEC_BACKEND - ShmemBackendArrayRemove(bp->pid); + ShmemBackendArrayRemove(pid); #endif - DLRemove(curr); - free(bp); - DLFreeElem(curr); - break; - } - curr = DLGetSucc(curr); - } - - if (pid == CheckPointPID) - { - CheckPointPID = 0; - if (!FatalError) - { - checkpointed = time(NULL); - } - } - else if (pid == BgWriterPID) - BgWriterPID = 0; - else + /* Tell the collector about backend termination */ pgstat_beterm(pid); - - return; + break; + } } +} - /* below here we're dealing with a non-normal exit */ +/* + * HandleChildCrash -- cleanup after failed backend or bgwriter. + * + * The objectives here are to clean up our local state about the child + * process, and to signal all other remaining children to quickdie. + */ +static void +HandleChildCrash(int pid, + int exitstatus) /* child's exit status. */ +{ + Dlelem *curr, + *next; + Backend *bp; - /* Make log entry unless we did so already */ + /* + * Make log entry unless there was a previous crash (if so, nonzero + * exit status is to be expected in SIGQUIT response; don't clutter log) + */ if (!FatalError) { LogChildExit(LOG, - (pid == CheckPointPID) ? gettext("checkpoint process") : - (pid == BgWriterPID) ? gettext("bgwriter process") : + (pid == BgWriterPID) ? + gettext("background writer process") : gettext("server process"), pid, exitstatus); ereport(LOG, - (errmsg("terminating any other active server processes"))); + (errmsg("terminating any other active server processes"))); } - curr = DLGetHead(BackendList); - while (curr) + /* Process regular backends */ + for (curr = DLGetHead(BackendList); curr; curr = next) { next = DLGetSucc(curr); bp = (Backend *) DLE_VAL(curr); - if (bp->pid != pid) + if (bp->pid == pid) + { + /* + * Found entry for freshly-dead backend, so remove it. + */ + DLRemove(curr); + free(bp); + DLFreeElem(curr); +#ifdef EXEC_BACKEND + ShmemBackendArrayRemove(pid); +#endif + /* Tell the collector about backend termination */ + pgstat_beterm(pid); + /* Keep looping so we can signal remaining backends */ + } + else { /* * This backend is still alive. Unless we did so already, @@ -2130,34 +2078,18 @@ CleanupProc(int pid, kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT)); } } - else - { - /* - * Found entry for freshly-dead backend, so remove it. - */ -#ifdef EXEC_BACKEND - ShmemBackendArrayRemove(bp->pid); -#endif - DLRemove(curr); - free(bp); - DLFreeElem(curr); - } - curr = next; } - if (pid == CheckPointPID) - { - CheckPointPID = 0; - checkpointed = 0; - } - else if (pid == BgWriterPID) + /* Take care of the bgwriter too */ + if (pid == BgWriterPID) BgWriterPID = 0; - else + else if (BgWriterPID != 0 && !FatalError) { - /* - * Tell the collector about backend termination - */ - pgstat_beterm(pid); + ereport(DEBUG2, + (errmsg_internal("sending %s to process %d", + (SendStop ? "SIGSTOP" : "SIGQUIT"), + (int) BgWriterPID))); + kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); } FatalError = true; @@ -2204,26 +2136,16 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) static void SignalChildren(int signal) { - Dlelem *curr, - *next; - Backend *bp; + Dlelem *curr; - curr = DLGetHead(BackendList); - while (curr) + for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr)) { - next = DLGetSucc(curr); - bp = (Backend *) DLE_VAL(curr); + Backend *bp = (Backend *) DLE_VAL(curr); - if (bp->pid != MyProcPid) - { - ereport(DEBUG2, - (errmsg_internal("sending signal %d to process %d", - signal, - (int) bp->pid))); - kill(bp->pid, signal); - } - - curr = next; + ereport(DEBUG4, + (errmsg_internal("sending signal %d to process %d", + signal, (int) bp->pid))); + kill(bp->pid, signal); } } @@ -2346,10 +2268,10 @@ BackendStartup(Port *port) */ bn->pid = pid; bn->cancel_key = MyCancelKey; + DLAddHead(BackendList, DLNewElem(bn)); #ifdef EXEC_BACKEND ShmemBackendArrayAdd(bn); #endif - DLAddHead(BackendList, DLNewElem(bn)); return STATUS_OK; } @@ -2438,9 +2360,9 @@ BackendRun(Port *port) /* * Let's clean up ourselves as the postmaster child, and close the - * postmaster's other sockets + * postmaster's listen sockets */ - ClosePostmasterPorts(true); + ClosePostmasterPorts(); /* We don't want the postmaster's proc_exit() handlers */ on_exit_reset(); @@ -2833,23 +2755,23 @@ SubPostmasterMain(int argc, char *argv[]) if (strcmp(argv[1], "-forkboot") == 0) { /* Close the postmaster's sockets */ - ClosePostmasterPorts(true); + ClosePostmasterPorts(); /* Attach process to shared segments */ CreateSharedMemoryAndSemaphores(false, MaxBackends, 0); BootstrapMain(argc - 2, argv + 2); - ExitPostmaster(0); + proc_exit(0); } if (strcmp(argv[1], "-forkbuf") == 0) { /* Close the postmaster's sockets */ - ClosePostmasterPorts(false); + ClosePostmasterPorts(); /* Do not want to attach to shared memory */ PgstatBufferMain(argc, argv); - ExitPostmaster(0); + proc_exit(0); } if (strcmp(argv[1], "-forkcol") == 0) { @@ -2861,7 +2783,7 @@ SubPostmasterMain(int argc, char *argv[]) /* Do not want to attach to shared memory */ PgstatCollectorMain(argc, argv); - ExitPostmaster(0); + proc_exit(0); } return 1; /* shouldn't get here */ @@ -2886,8 +2808,6 @@ ExitPostmaster(int status) * * MUST -- vadim 05-10-1999 */ - /* Should I use true instead? */ - ClosePostmasterPorts(false); proc_exit(status); } @@ -2902,45 +2822,6 @@ sigusr1_handler(SIGNAL_ARGS) PG_SETMASK(&BlockSig); - if (CheckPostmasterSignal(PMSIGNAL_DO_CHECKPOINT)) - { - if (CheckPointWarning != 0) - { - /* - * This only times checkpoints forced by running out of - * segment files. Other checkpoints could reduce the - * frequency of forced checkpoints. - */ - time_t now = time(NULL); - - if (LastSignalledCheckpoint != 0) - { - int elapsed_secs = now - LastSignalledCheckpoint; - - if (elapsed_secs < CheckPointWarning) - ereport(LOG, - (errmsg("checkpoints are occurring too frequently (%d seconds apart)", - elapsed_secs), - errhint("Consider increasing the configuration parameter \"checkpoint_segments\"."))); - } - LastSignalledCheckpoint = now; - } - - /* - * Request to schedule a checkpoint - * - * Ignore request if checkpoint is already running or checkpointing - * is currently disabled - */ - if (CheckPointPID == 0 && checkpointed && - StartupPID == 0 && Shutdown == NoShutdown && - !FatalError && random_seed != 0) - { - CheckPointPID = CheckPointDataBase(); - /* note: if fork fails, CheckPointPID stays 0; nothing happens */ - } - } - if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE)) { /* @@ -2957,7 +2838,7 @@ sigusr1_handler(SIGNAL_ARGS) * CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the * use of this. */ - if (Shutdown == NoShutdown) + if (Shutdown <= SmartShutdown) SignalChildren(SIGUSR1); } @@ -2971,9 +2852,10 @@ sigusr1_handler(SIGNAL_ARGS) * Dummy signal handler * * We use this for signals that we don't actually use in the postmaster, - * but we do use in backends. If we SIG_IGN such signals in the postmaster, - * then a newly started backend might drop a signal that arrives before it's - * able to reconfigure its signal processing. (See notes in postgres.c.) + * but we do use in backends. If we were to SIG_IGN such signals in the + * postmaster, then a newly started backend might drop a signal that arrives + * before it's able to reconfigure its signal processing. (See notes in + * tcop/postgres.c.) */ static void dummy_handler(SIGNAL_ARGS) @@ -3057,37 +2939,28 @@ static int CountChildren(void) { Dlelem *curr; - Backend *bp; int cnt = 0; for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr)) { - bp = (Backend *) DLE_VAL(curr); - if (bp->pid != MyProcPid) - cnt++; - } - /* Checkpoint and bgwriter will be in the list, discount them */ - if (CheckPointPID != 0) - cnt--; - if (BgWriterPID != 0) - cnt--; + cnt++; + } return cnt; } /* - * SSDataBase -- start a non-backend child process for the postmaster + * StartChildProcess -- start a non-backend child process for the postmaster * * xlog determines what kind of child will be started. All child types * initially go to BootstrapMain, which will handle common setup. * - * Return value of SSDataBase is subprocess' PID, or 0 if failed to start - * subprocess (0 is returned only for checkpoint/bgwriter cases). + * Return value of StartChildProcess is subprocess' PID, or 0 if failed + * to start subprocess. */ static pid_t -SSDataBase(int xlop) +StartChildProcess(int xlop) { - Backend *bn; pid_t pid; char *av[10]; int ac = 0; @@ -3153,7 +3026,7 @@ SSDataBase(int xlop) IsUnderPostmaster = true; /* we are a postmaster subprocess now */ /* Close the postmaster's sockets */ - ClosePostmasterPorts(true); + ClosePostmasterPorts(); /* Lose the postmaster's on-exit routines and port connections */ on_exit_reset(); @@ -3180,17 +3053,9 @@ SSDataBase(int xlop) ereport(LOG, (errmsg("could not fork startup process: %m"))); break; - case BS_XLOG_CHECKPOINT: - ereport(LOG, - (errmsg("could not fork checkpoint process: %m"))); - break; case BS_XLOG_BGWRITER: ereport(LOG, - (errmsg("could not fork bgwriter process: %m"))); - break; - case BS_XLOG_SHUTDOWN: - ereport(LOG, - (errmsg("could not fork shutdown process: %m"))); + (errmsg("could not fork background writer process: %m"))); break; default: ereport(LOG, @@ -3199,50 +3064,17 @@ SSDataBase(int xlop) } /* - * fork failure is fatal during startup/shutdown, but there's no - * need to choke if a routine checkpoint or starting a background - * writer fails. + * fork failure is fatal during startup, but there's no need + * to choke immediately if starting other child types fails. */ - if (xlop == BS_XLOG_CHECKPOINT) - return 0; - if (xlop == BS_XLOG_BGWRITER) - return 0; - ExitPostmaster(1); + if (xlop == BS_XLOG_STARTUP) + ExitPostmaster(1); + return 0; } /* * in parent, successful fork - * - * The startup and shutdown processes are not considered normal - * backends, but the checkpoint and bgwriter processes are. They must - * be added to the list of backends. */ - if (xlop == BS_XLOG_CHECKPOINT || xlop == BS_XLOG_BGWRITER) - { - if (!(bn = (Backend *) malloc(sizeof(Backend)))) - { - ereport(LOG, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - ExitPostmaster(1); - } - - bn->pid = pid; - bn->cancel_key = PostmasterRandom(); -#ifdef EXEC_BACKEND - ShmemBackendArrayAdd(bn); -#endif - DLAddHead(BackendList, DLNewElem(bn)); - - /* - * Since this code is executed periodically, it's a fine place to - * do other actions that should happen every now and then on no - * particular schedule. Such as... - */ - TouchSocketFile(); - TouchSocketLockFile(); - } - return pid; } @@ -3316,6 +3148,8 @@ extern int pgStatSock; #define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp) #define read_var(var,fp) fread((void*)&(var),sizeof(var),1,fp) +#define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp) +#define read_array_var(var,fp) fread((void*)(var),sizeof(var),1,fp) static bool write_backend_variables(char *filename, Port *port) @@ -3326,9 +3160,9 @@ write_backend_variables(char *filename, Port *port) /* Calculate name for temp file in caller's buffer */ Assert(DataDir); - snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%lu", + snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu", DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX, - ++tmpBackendFileNum); + MyProcPid, ++tmpBackendFileNum); /* Open file */ fp = AllocateFile(filename, PG_BINARY_W); @@ -3366,7 +3200,9 @@ write_backend_variables(char *filename, Port *port) */ StrNCpy(str_buf, DataDir, MAXPGPATH); - fwrite((void *) str_buf, MAXPGPATH, 1, fp); + write_array_var(str_buf, fp); + + write_array_var(ListenSocket, fp); write_var(MyCancelKey, fp); @@ -3386,14 +3222,15 @@ write_backend_variables(char *filename, Port *port) write_var(debug_flag, fp); write_var(PostmasterPid, fp); - fwrite((void *) my_exec_path, MAXPGPATH, 1, fp); + StrNCpy(str_buf, my_exec_path, MAXPGPATH); + write_array_var(str_buf, fp); - fwrite((void *) ExtraOptions, sizeof(ExtraOptions), 1, fp); + write_array_var(ExtraOptions, fp); StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH); - fwrite((void *) str_buf, MAXPGPATH, 1, fp); + write_array_var(str_buf, fp); StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH); - fwrite((void *) str_buf, MAXPGPATH, 1, fp); + write_array_var(str_buf, fp); /* Release file */ if (FreeFile(fp)) @@ -3430,9 +3267,11 @@ read_backend_variables(char *filename, Port *port) read_var(port->cryptSalt, fp); read_var(port->md5Salt, fp); - fread((void *) str_buf, MAXPGPATH, 1, fp); + read_array_var(str_buf, fp); SetDataDir(str_buf); + read_array_var(ListenSocket, fp); + read_var(MyCancelKey, fp); read_var(UsedShmemSegID, fp); @@ -3451,13 +3290,14 @@ read_backend_variables(char *filename, Port *port) read_var(debug_flag, fp); read_var(PostmasterPid, fp); - fread((void *) my_exec_path, MAXPGPATH, 1, fp); + read_array_var(str_buf, fp); + StrNCpy(my_exec_path, str_buf, MAXPGPATH); - fread((void *) ExtraOptions, sizeof(ExtraOptions), 1, fp); + read_array_var(ExtraOptions, fp); - fread((void *) str_buf, MAXPGPATH, 1, fp); + read_array_var(str_buf, fp); setlocale(LC_COLLATE, str_buf); - fread((void *) str_buf, MAXPGPATH, 1, fp); + read_array_var(str_buf, fp); setlocale(LC_CTYPE, str_buf); /* Release file */ @@ -3481,6 +3321,7 @@ ShmemBackendArrayAllocation(void) size_t size = ShmemBackendArraySize(); ShmemBackendArray = (Backend *) ShmemAlloc(size); + /* Mark all slots as empty */ memset(ShmemBackendArray, 0, size); } @@ -3489,9 +3330,9 @@ ShmemBackendArrayAdd(Backend *bn) { int i; + /* Find an empty slot */ for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++) { - /* Find an empty slot */ if (ShmemBackendArray[i].pid == 0) { ShmemBackendArray[i] = *bn; @@ -3500,7 +3341,7 @@ ShmemBackendArrayAdd(Backend *bn) } ereport(FATAL, - (errmsg_internal("unable to add backend entry"))); + (errmsg_internal("no free slots in shmem backend array"))); } static void @@ -3597,13 +3438,14 @@ win32_forkexec(const char *path, char *argv[]) } /* - * Note: The following three functions must not be interrupted (eg. by signals). - * As the Postgres Win32 signalling architecture (currently) requires polling, - * or APC checking functions which aren't used here, this is not an issue. + * Note: The following three functions must not be interrupted (eg. by + * signals). As the Postgres Win32 signalling architecture (currently) + * requires polling, or APC checking functions which aren't used here, this + * is not an issue. * - * We keep two separate arrays, instead of a single array of pid/HANDLE structs, - * to avoid having to re-create a handle array for WaitForMultipleObjects on - * each call to win32_waitpid. + * We keep two separate arrays, instead of a single array of pid/HANDLE + * structs, to avoid having to re-create a handle array for + * WaitForMultipleObjects on each call to win32_waitpid. */ static void @@ -3662,15 +3504,17 @@ win32_waitpid(int *exitstatus) */ int index; DWORD exitCode; - DWORD ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray, FALSE, 0); + DWORD ret; + ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray, + FALSE, 0); switch (ret) { case WAIT_FAILED: - ereport(ERROR, - (errmsg_internal("failed to wait on %lu children: %i", + ereport(LOG, + (errmsg_internal("failed to wait on %lu children: %d", win32_numChildren, (int) GetLastError()))); - /* Fall through to WAIT_TIMEOUTs return */ + return -1; case WAIT_TIMEOUT: /* No children have finished */ @@ -3685,7 +3529,7 @@ win32_waitpid(int *exitstatus) index = ret - WAIT_OBJECT_0; Assert(index >= 0 && index < win32_numChildren); if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode)) - + { /* * If we get this far, this should never happen, but, * then again... No choice other than to assume a @@ -3694,6 +3538,7 @@ win32_waitpid(int *exitstatus) ereport(FATAL, (errmsg_internal("failed to get exit code for child %lu", win32_childPIDArray[index]))); + } *exitstatus = (int) exitCode; return win32_childPIDArray[index]; } @@ -3703,8 +3548,10 @@ win32_waitpid(int *exitstatus) return -1; } -/* Note! Code belows executes on separate threads, one for - each child process created */ +/* + * Note! Code below executes on separate threads, one for + * each child process created + */ static DWORD WINAPI win32_sigchld_waiter(LPVOID param) { |
