diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/access/transam/Makefile | 3 | ||||
| -rw-r--r-- | src/backend/access/transam/timeline.c | 378 | ||||
| -rw-r--r-- | src/backend/access/transam/xlog.c | 988 | ||||
| -rw-r--r-- | src/backend/access/transam/xlogarchive.c | 572 | ||||
| -rw-r--r-- | src/include/access/timeline.h | 23 | ||||
| -rw-r--r-- | src/include/access/xlog_internal.h | 23 | 
6 files changed, 1058 insertions, 929 deletions
| diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index f82f10e989e..700cfd85c08 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -13,7 +13,8 @@ top_builddir = ../../../..  include $(top_builddir)/src/Makefile.global  OBJS = clog.o transam.o varsup.o xact.o rmgr.o slru.o subtrans.o multixact.o \ -	twophase.o twophase_rmgr.o xlog.o xlogfuncs.o xlogutils.o +	timeline.o twophase.o twophase_rmgr.o xlog.o xlogarchive.o xlogfuncs.o \ +	xlogutils.o  include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c new file mode 100644 index 00000000000..89d31c5a1bf --- /dev/null +++ b/src/backend/access/transam/timeline.c @@ -0,0 +1,378 @@ +/*------------------------------------------------------------------------- + * + * timeline.c + *		Functions for reading and writing timeline history files. + * + * A timeline history file lists the timeline changes of the timeline, in + * a simple text format. They are archived along with the WAL segments. + * + * The files are named like "<WAL segment>.history". For example, if the + * database starts up and switches to timeline 5, while processing WAL + * segment 000000030000002A00000006 (the old timeline was 3), the timeline + * history file would be called "000000050000002A00000006.history". + * + * Each line in the file represents a timeline switch: + * + * <parentTLI> <xlogfname> <reason> + * + *	parentTLI	ID of the parent timeline + *	xlogfname	filename of the WAL segment where the switch happened + *	reason		human-readable explanation of why the timeline was changed + * + * The fields are separated by tabs. Lines beginning with # are comments, and + * are ignored. Empty lines are also ignored. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/timeline.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <stdio.h> +#include <unistd.h> + +#include "access/timeline.h" +#include "access/xlog_internal.h" +#include "access/xlogdefs.h" +#include "storage/fd.h" + +/* + * Try to read a timeline's history file. + * + * If successful, return the list of component TLIs (the given TLI followed by + * its ancestor TLIs).	If we can't find the history file, assume that the + * timeline has no parents, and return a list of just the specified timeline + * ID. + */ +List * +readTimeLineHistory(TimeLineID targetTLI) +{ +	List	   *result; +	char		path[MAXPGPATH]; +	char		histfname[MAXFNAMELEN]; +	char		fline[MAXPGPATH]; +	FILE	   *fd; + +	/* Timeline 1 does not have a history file, so no need to check */ +	if (targetTLI == 1) +		return list_make1_int((int) targetTLI); + +	if (InArchiveRecovery) +	{ +		TLHistoryFileName(histfname, targetTLI); +		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); +	} +	else +		TLHistoryFilePath(path, targetTLI); + +	fd = AllocateFile(path, "r"); +	if (fd == NULL) +	{ +		if (errno != ENOENT) +			ereport(FATAL, +					(errcode_for_file_access(), +					 errmsg("could not open file \"%s\": %m", path))); +		/* Not there, so assume no parents */ +		return list_make1_int((int) targetTLI); +	} + +	result = NIL; + +	/* +	 * Parse the file... +	 */ +	while (fgets(fline, sizeof(fline), fd) != NULL) +	{ +		/* skip leading whitespace and check for # comment */ +		char	   *ptr; +		char	   *endptr; +		TimeLineID	tli; + +		for (ptr = fline; *ptr; ptr++) +		{ +			if (!isspace((unsigned char) *ptr)) +				break; +		} +		if (*ptr == '\0' || *ptr == '#') +			continue; + +		/* expect a numeric timeline ID as first field of line */ +		tli = (TimeLineID) strtoul(ptr, &endptr, 0); +		if (endptr == ptr) +			ereport(FATAL, +					(errmsg("syntax error in history file: %s", fline), +					 errhint("Expected a numeric timeline ID."))); + +		if (result && +			tli <= (TimeLineID) linitial_int(result)) +			ereport(FATAL, +					(errmsg("invalid data in history file: %s", fline), +				   errhint("Timeline IDs must be in increasing sequence."))); + +		/* Build list with newest item first */ +		result = lcons_int((int) tli, result); + +		/* we ignore the remainder of each line */ +	} + +	FreeFile(fd); + +	if (result && +		targetTLI <= (TimeLineID) linitial_int(result)) +		ereport(FATAL, +				(errmsg("invalid data in history file \"%s\"", path), +			errhint("Timeline IDs must be less than child timeline's ID."))); + +	result = lcons_int((int) targetTLI, result); + +	ereport(DEBUG3, +			(errmsg_internal("history of timeline %u is %s", +							 targetTLI, nodeToString(result)))); + +	return result; +} + +/* + * Probe whether a timeline history file exists for the given timeline ID + */ +bool +existsTimeLineHistory(TimeLineID probeTLI) +{ +	char		path[MAXPGPATH]; +	char		histfname[MAXFNAMELEN]; +	FILE	   *fd; + +	/* Timeline 1 does not have a history file, so no need to check */ +	if (probeTLI == 1) +		return false; + +	if (InArchiveRecovery) +	{ +		TLHistoryFileName(histfname, probeTLI); +		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); +	} +	else +		TLHistoryFilePath(path, probeTLI); + +	fd = AllocateFile(path, "r"); +	if (fd != NULL) +	{ +		FreeFile(fd); +		return true; +	} +	else +	{ +		if (errno != ENOENT) +			ereport(FATAL, +					(errcode_for_file_access(), +					 errmsg("could not open file \"%s\": %m", path))); +		return false; +	} +} + +/* + * Find the newest existing timeline, assuming that startTLI exists. + * + * Note: while this is somewhat heuristic, it does positively guarantee + * that (result + 1) is not a known timeline, and therefore it should + * be safe to assign that ID to a new timeline. + */ +TimeLineID +findNewestTimeLine(TimeLineID startTLI) +{ +	TimeLineID	newestTLI; +	TimeLineID	probeTLI; + +	/* +	 * The algorithm is just to probe for the existence of timeline history +	 * files.  XXX is it useful to allow gaps in the sequence? +	 */ +	newestTLI = startTLI; + +	for (probeTLI = startTLI + 1;; probeTLI++) +	{ +		if (existsTimeLineHistory(probeTLI)) +		{ +			newestTLI = probeTLI;		/* probeTLI exists */ +		} +		else +		{ +			/* doesn't exist, assume we're done */ +			break; +		} +	} + +	return newestTLI; +} + +/* + * Create a new timeline history file. + * + *	newTLI: ID of the new timeline + *	parentTLI: ID of its immediate parent + *	endTLI et al: ID of the last used WAL file, for annotation purposes + *	reason: human-readable explanation of why the timeline was switched + * + * Currently this is only used at the end recovery, and so there are no locking + * considerations.	But we should be just as tense as XLogFileInit to avoid + * emplacing a bogus file. + */ +void +writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, +					 TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason) +{ +	char		path[MAXPGPATH]; +	char		tmppath[MAXPGPATH]; +	char		histfname[MAXFNAMELEN]; +	char		xlogfname[MAXFNAMELEN]; +	char		buffer[BLCKSZ]; +	int			srcfd; +	int			fd; +	int			nbytes; + +	Assert(newTLI > parentTLI); /* else bad selection of newTLI */ + +	/* +	 * Write into a temp file name. +	 */ +	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); + +	unlink(tmppath); + +	/* do not use get_sync_bit() here --- want to fsync only at end of fill */ +	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL, +					   S_IRUSR | S_IWUSR); +	if (fd < 0) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not create file \"%s\": %m", tmppath))); + +	/* +	 * If a history file exists for the parent, copy it verbatim +	 */ +	if (InArchiveRecovery) +	{ +		TLHistoryFileName(histfname, parentTLI); +		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); +	} +	else +		TLHistoryFilePath(path, parentTLI); + +	srcfd = BasicOpenFile(path, O_RDONLY, 0); +	if (srcfd < 0) +	{ +		if (errno != ENOENT) +			ereport(ERROR, +					(errcode_for_file_access(), +					 errmsg("could not open file \"%s\": %m", path))); +		/* Not there, so assume parent has no parents */ +	} +	else +	{ +		for (;;) +		{ +			errno = 0; +			nbytes = (int) read(srcfd, buffer, sizeof(buffer)); +			if (nbytes < 0 || errno != 0) +				ereport(ERROR, +						(errcode_for_file_access(), +						 errmsg("could not read file \"%s\": %m", path))); +			if (nbytes == 0) +				break; +			errno = 0; +			if ((int) write(fd, buffer, nbytes) != nbytes) +			{ +				int			save_errno = errno; + +				/* +				 * If we fail to make the file, delete it to release disk +				 * space +				 */ +				unlink(tmppath); + +				/* +				 * if write didn't set errno, assume problem is no disk space +				 */ +				errno = save_errno ? save_errno : ENOSPC; + +				ereport(ERROR, +						(errcode_for_file_access(), +					 errmsg("could not write to file \"%s\": %m", tmppath))); +			} +		} +		close(srcfd); +	} + +	/* +	 * Append one line with the details of this timeline split. +	 * +	 * If we did have a parent file, insert an extra newline just in case the +	 * parent file failed to end with one. +	 */ +	XLogFileName(xlogfname, endTLI, endLogSegNo); + +	snprintf(buffer, sizeof(buffer), +			 "%s%u\t%s\t%s\n", +			 (srcfd < 0) ? "" : "\n", +			 parentTLI, +			 xlogfname, +			 reason); + +	nbytes = strlen(buffer); +	errno = 0; +	if ((int) write(fd, buffer, nbytes) != nbytes) +	{ +		int			save_errno = errno; + +		/* +		 * If we fail to make the file, delete it to release disk space +		 */ +		unlink(tmppath); +		/* if write didn't set errno, assume problem is no disk space */ +		errno = save_errno ? save_errno : ENOSPC; + +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not write to file \"%s\": %m", tmppath))); +	} + +	if (pg_fsync(fd) != 0) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not fsync file \"%s\": %m", tmppath))); + +	if (close(fd)) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not close file \"%s\": %m", tmppath))); + + +	/* +	 * Now move the completed history file into place with its final name. +	 */ +	TLHistoryFilePath(path, newTLI); + +	/* +	 * Prefer link() to rename() here just to be really sure that we don't +	 * overwrite an existing logfile.  However, there shouldn't be one, so +	 * rename() is an acceptable substitute except for the truly paranoid. +	 */ +#if HAVE_WORKING_LINK +	if (link(tmppath, path) < 0) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not link file \"%s\" to \"%s\": %m", +						tmppath, path))); +	unlink(tmppath); +#else +	if (rename(tmppath, path) < 0) +		ereport(ERROR, +				(errcode_for_file_access(), +				 errmsg("could not rename file \"%s\" to \"%s\": %m", +						tmppath, path))); +#endif +} diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ff56c26ab4c..b3519b04b33 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -26,6 +26,7 @@  #include "access/clog.h"  #include "access/multixact.h"  #include "access/subtrans.h" +#include "access/timeline.h"  #include "access/transam.h"  #include "access/tuptoaster.h"  #include "access/twophase.h" @@ -190,14 +191,14 @@ static bool LocalHotStandbyActive = false;   */  static int	LocalXLogInsertAllowed = -1; -/* Are we recovering using offline XLOG archives? */ -static bool InArchiveRecovery = false; +/* Are we recovering using offline XLOG archives? (only valid in the startup process) */ +bool InArchiveRecovery = false;  /* Was the last xlog file restored from archive, or local? */  static bool restoredFromArchive = false;  /* options taken from recovery.conf for archive recovery */ -static char *recoveryRestoreCommand = NULL; +char *recoveryRestoreCommand = NULL;  static char *recoveryEndCommand = NULL;  static char *archiveCleanupCommand = NULL;  static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; @@ -208,7 +209,7 @@ static TimestampTz recoveryTargetTime;  static char *recoveryTargetName;  /* options taken from recovery.conf for XLOG streaming */ -static bool StandbyMode = false; +bool StandbyMode = false;  static char *PrimaryConnInfo = NULL;  static char *TriggerFile = NULL; @@ -604,11 +605,6 @@ typedef struct xl_restore_point  } xl_restore_point; -static void XLogArchiveNotify(const char *xlog); -static void XLogArchiveNotifySeg(XLogSegNo segno); -static bool XLogArchiveCheckDone(const char *xlog); -static bool XLogArchiveIsBusy(const char *xlog); -static void XLogArchiveCleanup(const char *xlog);  static void readRecoveryCommandFile(void);  static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);  static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); @@ -636,10 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,  			 bool randAccess);  static int	emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);  static void XLogFileClose(void); -static bool RestoreArchivedFile(char *path, const char *xlogfname, -					const char *recovername, off_t expectedSize); -static void ExecuteRecoveryCommand(char *command, char *commandName, -					   bool failOnerror);  static void PreallocXlogFiles(XLogRecPtr endptr);  static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr);  static void UpdateLastRemovedPtr(char *filename); @@ -652,12 +644,7 @@ static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode);  static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record,  					  int emode, bool randAccess);  static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt); -static List *readTimeLineHistory(TimeLineID targetTLI); -static bool existsTimeLineHistory(TimeLineID probeTLI);  static bool rescanLatestTimeLine(void); -static TimeLineID findNewestTimeLine(TimeLineID startTLI); -static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, -					 TimeLineID endTLI, XLogSegNo endLogSegNo);  static void WriteControlFile(void);  static void ReadControlFile(void);  static char *str_time(pg_time_t tnow); @@ -1279,168 +1266,6 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,  }  /* - * XLogArchiveNotify - * - * Create an archive notification file - * - * The name of the notification file is the message that will be picked up - * by the archiver, e.g. we write 0000000100000001000000C6.ready - * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6, - * then when complete, rename it to 0000000100000001000000C6.done - */ -static void -XLogArchiveNotify(const char *xlog) -{ -	char		archiveStatusPath[MAXPGPATH]; -	FILE	   *fd; - -	/* insert an otherwise empty file called <XLOG>.ready */ -	StatusFilePath(archiveStatusPath, xlog, ".ready"); -	fd = AllocateFile(archiveStatusPath, "w"); -	if (fd == NULL) -	{ -		ereport(LOG, -				(errcode_for_file_access(), -				 errmsg("could not create archive status file \"%s\": %m", -						archiveStatusPath))); -		return; -	} -	if (FreeFile(fd)) -	{ -		ereport(LOG, -				(errcode_for_file_access(), -				 errmsg("could not write archive status file \"%s\": %m", -						archiveStatusPath))); -		return; -	} - -	/* Notify archiver that it's got something to do */ -	if (IsUnderPostmaster) -		SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); -} - -/* - * Convenience routine to notify using segment number representation of filename - */ -static void -XLogArchiveNotifySeg(XLogSegNo segno) -{ -	char		xlog[MAXFNAMELEN]; - -	XLogFileName(xlog, ThisTimeLineID, segno); -	XLogArchiveNotify(xlog); -} - -/* - * XLogArchiveCheckDone - * - * This is called when we are ready to delete or recycle an old XLOG segment - * file or backup history file.  If it is okay to delete it then return true. - * If it is not time to delete it, make sure a .ready file exists, and return - * false. - * - * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists, - * then return false; else create <XLOG>.ready and return false. - * - * The reason we do things this way is so that if the original attempt to - * create <XLOG>.ready fails, we'll retry during subsequent checkpoints. - */ -static bool -XLogArchiveCheckDone(const char *xlog) -{ -	char		archiveStatusPath[MAXPGPATH]; -	struct stat stat_buf; - -	/* Always deletable if archiving is off */ -	if (!XLogArchivingActive()) -		return true; - -	/* First check for .done --- this means archiver is done with it */ -	StatusFilePath(archiveStatusPath, xlog, ".done"); -	if (stat(archiveStatusPath, &stat_buf) == 0) -		return true; - -	/* check for .ready --- this means archiver is still busy with it */ -	StatusFilePath(archiveStatusPath, xlog, ".ready"); -	if (stat(archiveStatusPath, &stat_buf) == 0) -		return false; - -	/* Race condition --- maybe archiver just finished, so recheck */ -	StatusFilePath(archiveStatusPath, xlog, ".done"); -	if (stat(archiveStatusPath, &stat_buf) == 0) -		return true; - -	/* Retry creation of the .ready file */ -	XLogArchiveNotify(xlog); -	return false; -} - -/* - * XLogArchiveIsBusy - * - * Check to see if an XLOG segment file is still unarchived. - * This is almost but not quite the inverse of XLogArchiveCheckDone: in - * the first place we aren't chartered to recreate the .ready file, and - * in the second place we should consider that if the file is already gone - * then it's not busy.  (This check is needed to handle the race condition - * that a checkpoint already deleted the no-longer-needed file.) - */ -static bool -XLogArchiveIsBusy(const char *xlog) -{ -	char		archiveStatusPath[MAXPGPATH]; -	struct stat stat_buf; - -	/* First check for .done --- this means archiver is done with it */ -	StatusFilePath(archiveStatusPath, xlog, ".done"); -	if (stat(archiveStatusPath, &stat_buf) == 0) -		return false; - -	/* check for .ready --- this means archiver is still busy with it */ -	StatusFilePath(archiveStatusPath, xlog, ".ready"); -	if (stat(archiveStatusPath, &stat_buf) == 0) -		return true; - -	/* Race condition --- maybe archiver just finished, so recheck */ -	StatusFilePath(archiveStatusPath, xlog, ".done"); -	if (stat(archiveStatusPath, &stat_buf) == 0) -		return false; - -	/* -	 * Check to see if the WAL file has been removed by checkpoint, which -	 * implies it has already been archived, and explains why we can't see a -	 * status file for it. -	 */ -	snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog); -	if (stat(archiveStatusPath, &stat_buf) != 0 && -		errno == ENOENT) -		return false; - -	return true; -} - -/* - * XLogArchiveCleanup - * - * Cleanup archive notification file(s) for a particular xlog segment - */ -static void -XLogArchiveCleanup(const char *xlog) -{ -	char		archiveStatusPath[MAXPGPATH]; - -	/* Remove the .done file */ -	StatusFilePath(archiveStatusPath, xlog, ".done"); -	unlink(archiveStatusPath); -	/* should we complain about failure? */ - -	/* Remove the .ready file if present --- normally it shouldn't be */ -	StatusFilePath(archiveStatusPath, xlog, ".ready"); -	unlink(archiveStatusPath); -	/* should we complain about failure? */ -} - -/*   * Advance the Insert state to the next buffer page, writing out the next   * buffer if it still contains unwritten data.   * @@ -2955,386 +2780,6 @@ XLogFileClose(void)  }  /* - * Attempt to retrieve the specified file from off-line archival storage. - * If successful, fill "path" with its complete path (note that this will be - * a temp file name that doesn't follow the normal naming convention), and - * return TRUE. - * - * If not successful, fill "path" with the name of the normal on-line file - * (which may or may not actually exist, but we'll try to use it), and return - * FALSE. - * - * For fixed-size files, the caller may pass the expected size as an - * additional crosscheck on successful recovery.  If the file size is not - * known, set expectedSize = 0. - */ -static bool -RestoreArchivedFile(char *path, const char *xlogfname, -					const char *recovername, off_t expectedSize) -{ -	char		xlogpath[MAXPGPATH]; -	char		xlogRestoreCmd[MAXPGPATH]; -	char		lastRestartPointFname[MAXPGPATH]; -	char	   *dp; -	char	   *endp; -	const char *sp; -	int			rc; -	bool		signaled; -	struct stat stat_buf; -	XLogSegNo	restartSegNo; - -	/* In standby mode, restore_command might not be supplied */ -	if (recoveryRestoreCommand == NULL) -		goto not_available; - -	/* -	 * When doing archive recovery, we always prefer an archived log file even -	 * if a file of the same name exists in XLOGDIR.  The reason is that the -	 * file in XLOGDIR could be an old, un-filled or partly-filled version -	 * that was copied and restored as part of backing up $PGDATA. -	 * -	 * We could try to optimize this slightly by checking the local copy -	 * lastchange timestamp against the archived copy, but we have no API to -	 * do this, nor can we guarantee that the lastchange timestamp was -	 * preserved correctly when we copied to archive. Our aim is robustness, -	 * so we elect not to do this. -	 * -	 * If we cannot obtain the log file from the archive, however, we will try -	 * to use the XLOGDIR file if it exists.  This is so that we can make use -	 * of log segments that weren't yet transferred to the archive. -	 * -	 * Notice that we don't actually overwrite any files when we copy back -	 * from archive because the recoveryRestoreCommand may inadvertently -	 * restore inappropriate xlogs, or they may be corrupt, so we may wish to -	 * fallback to the segments remaining in current XLOGDIR later. The -	 * copy-from-archive filename is always the same, ensuring that we don't -	 * run out of disk space on long recoveries. -	 */ -	snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); - -	/* -	 * Make sure there is no existing file named recovername. -	 */ -	if (stat(xlogpath, &stat_buf) != 0) -	{ -		if (errno != ENOENT) -			ereport(FATAL, -					(errcode_for_file_access(), -					 errmsg("could not stat file \"%s\": %m", -							xlogpath))); -	} -	else -	{ -		if (unlink(xlogpath) != 0) -			ereport(FATAL, -					(errcode_for_file_access(), -					 errmsg("could not remove file \"%s\": %m", -							xlogpath))); -	} - -	/* -	 * Calculate the archive file cutoff point for use during log shipping -	 * replication. All files earlier than this point can be deleted from the -	 * archive, though there is no requirement to do so. -	 * -	 * We initialise this with the filename of an InvalidXLogRecPtr, which -	 * will prevent the deletion of any WAL files from the archive because of -	 * the alphabetic sorting property of WAL filenames. -	 * -	 * Once we have successfully located the redo pointer of the checkpoint -	 * from which we start recovery we never request a file prior to the redo -	 * pointer of the last restartpoint. When redo begins we know that we have -	 * successfully located it, so there is no need for additional status -	 * flags to signify the point when we can begin deleting WAL files from -	 * the archive. -	 */ -	if (InRedo) -	{ -		XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo); -		XLogFileName(lastRestartPointFname, -					 ControlFile->checkPointCopy.ThisTimeLineID, -					 restartSegNo); -		/* we shouldn't need anything earlier than last restart point */ -		Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); -	} -	else -		XLogFileName(lastRestartPointFname, 0, 0L); - -	/* -	 * construct the command to be executed -	 */ -	dp = xlogRestoreCmd; -	endp = xlogRestoreCmd + MAXPGPATH - 1; -	*endp = '\0'; - -	for (sp = recoveryRestoreCommand; *sp; sp++) -	{ -		if (*sp == '%') -		{ -			switch (sp[1]) -			{ -				case 'p': -					/* %p: relative path of target file */ -					sp++; -					StrNCpy(dp, xlogpath, endp - dp); -					make_native_path(dp); -					dp += strlen(dp); -					break; -				case 'f': -					/* %f: filename of desired file */ -					sp++; -					StrNCpy(dp, xlogfname, endp - dp); -					dp += strlen(dp); -					break; -				case 'r': -					/* %r: filename of last restartpoint */ -					sp++; -					StrNCpy(dp, lastRestartPointFname, endp - dp); -					dp += strlen(dp); -					break; -				case '%': -					/* convert %% to a single % */ -					sp++; -					if (dp < endp) -						*dp++ = *sp; -					break; -				default: -					/* otherwise treat the % as not special */ -					if (dp < endp) -						*dp++ = *sp; -					break; -			} -		} -		else -		{ -			if (dp < endp) -				*dp++ = *sp; -		} -	} -	*dp = '\0'; - -	ereport(DEBUG3, -			(errmsg_internal("executing restore command \"%s\"", -							 xlogRestoreCmd))); - -	/* -	 * Check signals before restore command and reset afterwards. -	 */ -	PreRestoreCommand(); - -	/* -	 * Copy xlog from archival storage to XLOGDIR -	 */ -	rc = system(xlogRestoreCmd); - -	PostRestoreCommand(); - -	if (rc == 0) -	{ -		/* -		 * command apparently succeeded, but let's make sure the file is -		 * really there now and has the correct size. -		 */ -		if (stat(xlogpath, &stat_buf) == 0) -		{ -			if (expectedSize > 0 && stat_buf.st_size != expectedSize) -			{ -				int			elevel; - -				/* -				 * If we find a partial file in standby mode, we assume it's -				 * because it's just being copied to the archive, and keep -				 * trying. -				 * -				 * Otherwise treat a wrong-sized file as FATAL to ensure the -				 * DBA would notice it, but is that too strong? We could try -				 * to plow ahead with a local copy of the file ... but the -				 * problem is that there probably isn't one, and we'd -				 * incorrectly conclude we've reached the end of WAL and we're -				 * done recovering ... -				 */ -				if (StandbyMode && stat_buf.st_size < expectedSize) -					elevel = DEBUG1; -				else -					elevel = FATAL; -				ereport(elevel, -						(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", -								xlogfname, -								(unsigned long) stat_buf.st_size, -								(unsigned long) expectedSize))); -				return false; -			} -			else -			{ -				ereport(LOG, -						(errmsg("restored log file \"%s\" from archive", -								xlogfname))); -				strcpy(path, xlogpath); -				return true; -			} -		} -		else -		{ -			/* stat failed */ -			if (errno != ENOENT) -				ereport(FATAL, -						(errcode_for_file_access(), -						 errmsg("could not stat file \"%s\": %m", -								xlogpath))); -		} -	} - -	/* -	 * Remember, we rollforward UNTIL the restore fails so failure here is -	 * just part of the process... that makes it difficult to determine -	 * whether the restore failed because there isn't an archive to restore, -	 * or because the administrator has specified the restore program -	 * incorrectly.  We have to assume the former. -	 * -	 * However, if the failure was due to any sort of signal, it's best to -	 * punt and abort recovery.  (If we "return false" here, upper levels will -	 * assume that recovery is complete and start up the database!) It's -	 * essential to abort on child SIGINT and SIGQUIT, because per spec -	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of -	 * those it's a good bet we should have gotten it too. -	 * -	 * On SIGTERM, assume we have received a fast shutdown request, and exit -	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the -	 * child process. If we receive it first, the signal handler will call -	 * proc_exit, otherwise we do it here. If we or the child process received -	 * SIGTERM for any other reason than a fast shutdown request, postmaster -	 * will perform an immediate shutdown when it sees us exiting -	 * unexpectedly. -	 * -	 * Per the Single Unix Spec, shells report exit status > 128 when a called -	 * command died on a signal.  Also, 126 and 127 are used to report -	 * problems such as an unfindable command; treat those as fatal errors -	 * too. -	 */ -	if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) -		proc_exit(1); - -	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; - -	ereport(signaled ? FATAL : DEBUG2, -		(errmsg("could not restore file \"%s\" from archive: return code %d", -				xlogfname, rc))); - -not_available: - -	/* -	 * if an archived file is not available, there might still be a version of -	 * this file in XLOGDIR, so return that as the filename to open. -	 * -	 * In many recovery scenarios we expect this to fail also, but if so that -	 * just means we've reached the end of WAL. -	 */ -	snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname); -	return false; -} - -/* - * Attempt to execute an external shell command during recovery. - * - * 'command' is the shell command to be executed, 'commandName' is a - * human-readable name describing the command emitted in the logs. If - * 'failOnSignal' is true and the command is killed by a signal, a FATAL - * error is thrown. Otherwise a WARNING is emitted. - * - * This is currently used for recovery_end_command and archive_cleanup_command. - */ -static void -ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) -{ -	char		xlogRecoveryCmd[MAXPGPATH]; -	char		lastRestartPointFname[MAXPGPATH]; -	char	   *dp; -	char	   *endp; -	const char *sp; -	int			rc; -	bool		signaled; -	XLogSegNo	restartSegNo; - -	Assert(command && commandName); - -	/* -	 * Calculate the archive file cutoff point for use during log shipping -	 * replication. All files earlier than this point can be deleted from the -	 * archive, though there is no requirement to do so. -	 */ -	LWLockAcquire(ControlFileLock, LW_SHARED); -	XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo); -	XLogFileName(lastRestartPointFname, -				 ControlFile->checkPointCopy.ThisTimeLineID, -				 restartSegNo); -	LWLockRelease(ControlFileLock); - -	/* -	 * construct the command to be executed -	 */ -	dp = xlogRecoveryCmd; -	endp = xlogRecoveryCmd + MAXPGPATH - 1; -	*endp = '\0'; - -	for (sp = command; *sp; sp++) -	{ -		if (*sp == '%') -		{ -			switch (sp[1]) -			{ -				case 'r': -					/* %r: filename of last restartpoint */ -					sp++; -					StrNCpy(dp, lastRestartPointFname, endp - dp); -					dp += strlen(dp); -					break; -				case '%': -					/* convert %% to a single % */ -					sp++; -					if (dp < endp) -						*dp++ = *sp; -					break; -				default: -					/* otherwise treat the % as not special */ -					if (dp < endp) -						*dp++ = *sp; -					break; -			} -		} -		else -		{ -			if (dp < endp) -				*dp++ = *sp; -		} -	} -	*dp = '\0'; - -	ereport(DEBUG3, -			(errmsg_internal("executing %s \"%s\"", commandName, command))); - -	/* -	 * execute the constructed command -	 */ -	rc = system(xlogRecoveryCmd); -	if (rc != 0) -	{ -		/* -		 * If the failure was due to any sort of signal, it's best to punt and -		 * abort recovery. See also detailed comments on signals in -		 * RestoreArchivedFile(). -		 */ -		signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; - -		ereport((signaled && failOnSignal) ? FATAL : WARNING, -		/*------ -		   translator: First %s represents a recovery.conf parameter name like -		  "recovery_end_command", and the 2nd is the value of that parameter. */ -				(errmsg("%s \"%s\": return code %d", commandName, -						command, rc))); -	} -} - -/*   * Preallocate log files beyond the specified log endpoint.   *   * XXX this is currently extremely conservative, since it forces only one @@ -4305,140 +3750,6 @@ ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode,  }  /* - * Try to read a timeline's history file. - * - * If successful, return the list of component TLIs (the given TLI followed by - * its ancestor TLIs).	If we can't find the history file, assume that the - * timeline has no parents, and return a list of just the specified timeline - * ID. - */ -static List * -readTimeLineHistory(TimeLineID targetTLI) -{ -	List	   *result; -	char		path[MAXPGPATH]; -	char		histfname[MAXFNAMELEN]; -	char		fline[MAXPGPATH]; -	FILE	   *fd; - -	/* Timeline 1 does not have a history file, so no need to check */ -	if (targetTLI == 1) -		return list_make1_int((int) targetTLI); - -	if (InArchiveRecovery) -	{ -		TLHistoryFileName(histfname, targetTLI); -		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); -	} -	else -		TLHistoryFilePath(path, targetTLI); - -	fd = AllocateFile(path, "r"); -	if (fd == NULL) -	{ -		if (errno != ENOENT) -			ereport(FATAL, -					(errcode_for_file_access(), -					 errmsg("could not open file \"%s\": %m", path))); -		/* Not there, so assume no parents */ -		return list_make1_int((int) targetTLI); -	} - -	result = NIL; - -	/* -	 * Parse the file... -	 */ -	while (fgets(fline, sizeof(fline), fd) != NULL) -	{ -		/* skip leading whitespace and check for # comment */ -		char	   *ptr; -		char	   *endptr; -		TimeLineID	tli; - -		for (ptr = fline; *ptr; ptr++) -		{ -			if (!isspace((unsigned char) *ptr)) -				break; -		} -		if (*ptr == '\0' || *ptr == '#') -			continue; - -		/* expect a numeric timeline ID as first field of line */ -		tli = (TimeLineID) strtoul(ptr, &endptr, 0); -		if (endptr == ptr) -			ereport(FATAL, -					(errmsg("syntax error in history file: %s", fline), -					 errhint("Expected a numeric timeline ID."))); - -		if (result && -			tli <= (TimeLineID) linitial_int(result)) -			ereport(FATAL, -					(errmsg("invalid data in history file: %s", fline), -				   errhint("Timeline IDs must be in increasing sequence."))); - -		/* Build list with newest item first */ -		result = lcons_int((int) tli, result); - -		/* we ignore the remainder of each line */ -	} - -	FreeFile(fd); - -	if (result && -		targetTLI <= (TimeLineID) linitial_int(result)) -		ereport(FATAL, -				(errmsg("invalid data in history file \"%s\"", path), -			errhint("Timeline IDs must be less than child timeline's ID."))); - -	result = lcons_int((int) targetTLI, result); - -	ereport(DEBUG3, -			(errmsg_internal("history of timeline %u is %s", -							 targetTLI, nodeToString(result)))); - -	return result; -} - -/* - * Probe whether a timeline history file exists for the given timeline ID - */ -static bool -existsTimeLineHistory(TimeLineID probeTLI) -{ -	char		path[MAXPGPATH]; -	char		histfname[MAXFNAMELEN]; -	FILE	   *fd; - -	/* Timeline 1 does not have a history file, so no need to check */ -	if (probeTLI == 1) -		return false; - -	if (InArchiveRecovery) -	{ -		TLHistoryFileName(histfname, probeTLI); -		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); -	} -	else -		TLHistoryFilePath(path, probeTLI); - -	fd = AllocateFile(path, "r"); -	if (fd != NULL) -	{ -		FreeFile(fd); -		return true; -	} -	else -	{ -		if (errno != ENOENT) -			ereport(FATAL, -					(errcode_for_file_access(), -					 errmsg("could not open file \"%s\": %m", path))); -		return false; -	} -} - -/*   * Scan for new timelines that might have appeared in the archive since we   * started recovery.   * @@ -4502,239 +3813,6 @@ rescanLatestTimeLine(void)  }  /* - * Find the newest existing timeline, assuming that startTLI exists. - * - * Note: while this is somewhat heuristic, it does positively guarantee - * that (result + 1) is not a known timeline, and therefore it should - * be safe to assign that ID to a new timeline. - */ -static TimeLineID -findNewestTimeLine(TimeLineID startTLI) -{ -	TimeLineID	newestTLI; -	TimeLineID	probeTLI; - -	/* -	 * The algorithm is just to probe for the existence of timeline history -	 * files.  XXX is it useful to allow gaps in the sequence? -	 */ -	newestTLI = startTLI; - -	for (probeTLI = startTLI + 1;; probeTLI++) -	{ -		if (existsTimeLineHistory(probeTLI)) -		{ -			newestTLI = probeTLI;		/* probeTLI exists */ -		} -		else -		{ -			/* doesn't exist, assume we're done */ -			break; -		} -	} - -	return newestTLI; -} - -/* - * Create a new timeline history file. - * - *	newTLI: ID of the new timeline - *	parentTLI: ID of its immediate parent - *	endTLI et al: ID of the last used WAL file, for annotation purposes - * - * Currently this is only used during recovery, and so there are no locking - * considerations.	But we should be just as tense as XLogFileInit to avoid - * emplacing a bogus file. - */ -static void -writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, -					 TimeLineID endTLI, XLogSegNo endLogSegNo) -{ -	char		path[MAXPGPATH]; -	char		tmppath[MAXPGPATH]; -	char		histfname[MAXFNAMELEN]; -	char		xlogfname[MAXFNAMELEN]; -	char		buffer[BLCKSZ]; -	int			srcfd; -	int			fd; -	int			nbytes; - -	Assert(newTLI > parentTLI); /* else bad selection of newTLI */ - -	/* -	 * Write into a temp file name. -	 */ -	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); - -	unlink(tmppath); - -	/* do not use get_sync_bit() here --- want to fsync only at end of fill */ -	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL, -					   S_IRUSR | S_IWUSR); -	if (fd < 0) -		ereport(ERROR, -				(errcode_for_file_access(), -				 errmsg("could not create file \"%s\": %m", tmppath))); - -	/* -	 * If a history file exists for the parent, copy it verbatim -	 */ -	if (InArchiveRecovery) -	{ -		TLHistoryFileName(histfname, parentTLI); -		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); -	} -	else -		TLHistoryFilePath(path, parentTLI); - -	srcfd = BasicOpenFile(path, O_RDONLY, 0); -	if (srcfd < 0) -	{ -		if (errno != ENOENT) -			ereport(ERROR, -					(errcode_for_file_access(), -					 errmsg("could not open file \"%s\": %m", path))); -		/* Not there, so assume parent has no parents */ -	} -	else -	{ -		for (;;) -		{ -			errno = 0; -			nbytes = (int) read(srcfd, buffer, sizeof(buffer)); -			if (nbytes < 0 || errno != 0) -				ereport(ERROR, -						(errcode_for_file_access(), -						 errmsg("could not read file \"%s\": %m", path))); -			if (nbytes == 0) -				break; -			errno = 0; -			if ((int) write(fd, buffer, nbytes) != nbytes) -			{ -				int			save_errno = errno; - -				/* -				 * If we fail to make the file, delete it to release disk -				 * space -				 */ -				unlink(tmppath); - -				/* -				 * if write didn't set errno, assume problem is no disk space -				 */ -				errno = save_errno ? save_errno : ENOSPC; - -				ereport(ERROR, -						(errcode_for_file_access(), -					 errmsg("could not write to file \"%s\": %m", tmppath))); -			} -		} -		close(srcfd); -	} - -	/* -	 * Append one line with the details of this timeline split. -	 * -	 * If we did have a parent file, insert an extra newline just in case the -	 * parent file failed to end with one. -	 */ -	XLogFileName(xlogfname, endTLI, endLogSegNo); - -	/* -	 * Write comment to history file to explain why and where timeline -	 * changed. Comment varies according to the recovery target used. -	 */ -	if (recoveryTarget == RECOVERY_TARGET_XID) -		snprintf(buffer, sizeof(buffer), -				 "%s%u\t%s\t%s transaction %u\n", -				 (srcfd < 0) ? "" : "\n", -				 parentTLI, -				 xlogfname, -				 recoveryStopAfter ? "after" : "before", -				 recoveryStopXid); -	else if (recoveryTarget == RECOVERY_TARGET_TIME) -		snprintf(buffer, sizeof(buffer), -				 "%s%u\t%s\t%s %s\n", -				 (srcfd < 0) ? "" : "\n", -				 parentTLI, -				 xlogfname, -				 recoveryStopAfter ? "after" : "before", -				 timestamptz_to_str(recoveryStopTime)); -	else if (recoveryTarget == RECOVERY_TARGET_NAME) -		snprintf(buffer, sizeof(buffer), -				 "%s%u\t%s\tat restore point \"%s\"\n", -				 (srcfd < 0) ? "" : "\n", -				 parentTLI, -				 xlogfname, -				 recoveryStopName); -	else -		snprintf(buffer, sizeof(buffer), -				 "%s%u\t%s\tno recovery target specified\n", -				 (srcfd < 0) ? "" : "\n", -				 parentTLI, -				 xlogfname); - -	nbytes = strlen(buffer); -	errno = 0; -	if ((int) write(fd, buffer, nbytes) != nbytes) -	{ -		int			save_errno = errno; - -		/* -		 * If we fail to make the file, delete it to release disk space -		 */ -		unlink(tmppath); -		/* if write didn't set errno, assume problem is no disk space */ -		errno = save_errno ? save_errno : ENOSPC; - -		ereport(ERROR, -				(errcode_for_file_access(), -				 errmsg("could not write to file \"%s\": %m", tmppath))); -	} - -	if (pg_fsync(fd) != 0) -		ereport(ERROR, -				(errcode_for_file_access(), -				 errmsg("could not fsync file \"%s\": %m", tmppath))); - -	if (close(fd)) -		ereport(ERROR, -				(errcode_for_file_access(), -				 errmsg("could not close file \"%s\": %m", tmppath))); - - -	/* -	 * Now move the completed history file into place with its final name. -	 */ -	TLHistoryFilePath(path, newTLI); - -	/* -	 * Prefer link() to rename() here just to be really sure that we don't -	 * overwrite an existing logfile.  However, there shouldn't be one, so -	 * rename() is an acceptable substitute except for the truly paranoid. -	 */ -#if HAVE_WORKING_LINK -	if (link(tmppath, path) < 0) -		ereport(ERROR, -				(errcode_for_file_access(), -				 errmsg("could not link file \"%s\" to \"%s\": %m", -						tmppath, path))); -	unlink(tmppath); -#else -	if (rename(tmppath, path) < 0) -		ereport(ERROR, -				(errcode_for_file_access(), -				 errmsg("could not rename file \"%s\" to \"%s\": %m", -						tmppath, path))); -#endif - -	/* The history file can be archived immediately. */ -	TLHistoryFileName(histfname, newTLI); -	XLogArchiveNotify(histfname); -} - -/*   * I/O routines for pg_control   *   * *ControlFile is a buffer in shared memory that holds an image of the @@ -6869,11 +5947,35 @@ StartupXLOG(void)  	 */  	if (InArchiveRecovery)  	{ +		char	reason[200]; +  		ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;  		ereport(LOG,  				(errmsg("selected new timeline ID: %u", ThisTimeLineID))); + +		/* +		 * Write comment to history file to explain why and where timeline +		 * changed. Comment varies according to the recovery target used. +		 */ +		if (recoveryTarget == RECOVERY_TARGET_XID) +			snprintf(reason, sizeof(reason), +					 "%s transaction %u", +					 recoveryStopAfter ? "after" : "before", +					 recoveryStopXid); +		else if (recoveryTarget == RECOVERY_TARGET_TIME) +			snprintf(reason, sizeof(reason), +					 "%s %s\n", +					 recoveryStopAfter ? "after" : "before", +					 timestamptz_to_str(recoveryStopTime)); +		else if (recoveryTarget == RECOVERY_TARGET_NAME) +			snprintf(reason, sizeof(reason), +					 "at restore point \"%s\"", +					 recoveryStopName); +		else +			snprintf(reason, sizeof(reason), "no recovery target specified"); +  		writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI, -							 curFileTLI, endLogSegNo); +							 curFileTLI, endLogSegNo, reason);  	}  	/* Save the selected TimeLineID in shared memory, too */ @@ -9919,6 +9021,36 @@ GetXLogWriteRecPtr(void)  }  /* + * Returns the redo pointer of the last restartpoint. This is the oldest + * point in WAL that we still need, if we have to restart recovery. Returns + * InvalidXLogRecPtr if we don't reliably know that point yet, that is, + * before we have started WAL redo. + * + * This function only works in the startup process, and only while we are + * in WAL redo. It's important to not return a value before redo has started, + * to avoid deleting WAL files that we might still need, but there's no + * fundamental reason why this couldn't return a valid value after redo has + * finished, or in other processes. This is enough for the current usage, + * however. + */ +void +GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli) +{ +	if (InRedo) +	{ +		LWLockAcquire(ControlFileLock, LW_SHARED); +		*oldrecptr = ControlFile->checkPointCopy.redo; +		*oldtli = ControlFile->checkPointCopy.ThisTimeLineID; +		LWLockRelease(ControlFileLock); +	} +	else +	{ +		*oldrecptr = InvalidXLogRecPtr; +		*oldtli = 0; +	} +} + +/*   * read_backup_label: check to see if a backup_label file is present   *   * If we see a backup_label during recovery, we assume that we are recovering diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c new file mode 100644 index 00000000000..e72795783fc --- /dev/null +++ b/src/backend/access/transam/xlogarchive.c @@ -0,0 +1,572 @@ +/*------------------------------------------------------------------------- + * + * xlogarchive.c + *		Functions for archiving WAL files and restoring from the archive. + * + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/xlogarchive.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <signal.h> +#include <unistd.h> + +#include "access/xlog_internal.h" +#include "miscadmin.h" +#include "postmaster/startup.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/pmsignal.h" + +/* + * Attempt to retrieve the specified file from off-line archival storage. + * If successful, fill "path" with its complete path (note that this will be + * a temp file name that doesn't follow the normal naming convention), and + * return TRUE. + * + * If not successful, fill "path" with the name of the normal on-line file + * (which may or may not actually exist, but we'll try to use it), and return + * FALSE. + * + * For fixed-size files, the caller may pass the expected size as an + * additional crosscheck on successful recovery.  If the file size is not + * known, set expectedSize = 0. + */ +bool +RestoreArchivedFile(char *path, const char *xlogfname, +					const char *recovername, off_t expectedSize) +{ +	char		xlogpath[MAXPGPATH]; +	char		xlogRestoreCmd[MAXPGPATH]; +	char		lastRestartPointFname[MAXPGPATH]; +	char	   *dp; +	char	   *endp; +	const char *sp; +	int			rc; +	bool		signaled; +	struct stat stat_buf; +	XLogSegNo	restartSegNo; +	XLogRecPtr	restartRedoPtr; +	TimeLineID	restartTli; + +	/* In standby mode, restore_command might not be supplied */ +	if (recoveryRestoreCommand == NULL) +		goto not_available; + +	/* +	 * When doing archive recovery, we always prefer an archived log file even +	 * if a file of the same name exists in XLOGDIR.  The reason is that the +	 * file in XLOGDIR could be an old, un-filled or partly-filled version +	 * that was copied and restored as part of backing up $PGDATA. +	 * +	 * We could try to optimize this slightly by checking the local copy +	 * lastchange timestamp against the archived copy, but we have no API to +	 * do this, nor can we guarantee that the lastchange timestamp was +	 * preserved correctly when we copied to archive. Our aim is robustness, +	 * so we elect not to do this. +	 * +	 * If we cannot obtain the log file from the archive, however, we will try +	 * to use the XLOGDIR file if it exists.  This is so that we can make use +	 * of log segments that weren't yet transferred to the archive. +	 * +	 * Notice that we don't actually overwrite any files when we copy back +	 * from archive because the restore_command may inadvertently +	 * restore inappropriate xlogs, or they may be corrupt, so we may wish to +	 * fallback to the segments remaining in current XLOGDIR later. The +	 * copy-from-archive filename is always the same, ensuring that we don't +	 * run out of disk space on long recoveries. +	 */ +	snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername); + +	/* +	 * Make sure there is no existing file named recovername. +	 */ +	if (stat(xlogpath, &stat_buf) != 0) +	{ +		if (errno != ENOENT) +			ereport(FATAL, +					(errcode_for_file_access(), +					 errmsg("could not stat file \"%s\": %m", +							xlogpath))); +	} +	else +	{ +		if (unlink(xlogpath) != 0) +			ereport(FATAL, +					(errcode_for_file_access(), +					 errmsg("could not remove file \"%s\": %m", +							xlogpath))); +	} + +	/* +	 * Calculate the archive file cutoff point for use during log shipping +	 * replication. All files earlier than this point can be deleted from the +	 * archive, though there is no requirement to do so. +	 * +	 * We initialise this with the filename of an InvalidXLogRecPtr, which +	 * will prevent the deletion of any WAL files from the archive because of +	 * the alphabetic sorting property of WAL filenames. +	 * +	 * Once we have successfully located the redo pointer of the checkpoint +	 * from which we start recovery we never request a file prior to the redo +	 * pointer of the last restartpoint. When redo begins we know that we have +	 * successfully located it, so there is no need for additional status +	 * flags to signify the point when we can begin deleting WAL files from +	 * the archive. +	 */ +	GetOldestRestartPoint(&restartRedoPtr, &restartTli); +	if (!XLogRecPtrIsInvalid(restartRedoPtr)) +	{ +		XLByteToSeg(restartRedoPtr, restartSegNo); +		XLogFileName(lastRestartPointFname, restartTli, restartSegNo); +		/* we shouldn't need anything earlier than last restart point */ +		Assert(strcmp(lastRestartPointFname, xlogfname) <= 0); +	} +	else +		XLogFileName(lastRestartPointFname, 0, 0L); + +	/* +	 * construct the command to be executed +	 */ +	dp = xlogRestoreCmd; +	endp = xlogRestoreCmd + MAXPGPATH - 1; +	*endp = '\0'; + +	for (sp = recoveryRestoreCommand; *sp; sp++) +	{ +		if (*sp == '%') +		{ +			switch (sp[1]) +			{ +				case 'p': +					/* %p: relative path of target file */ +					sp++; +					StrNCpy(dp, xlogpath, endp - dp); +					make_native_path(dp); +					dp += strlen(dp); +					break; +				case 'f': +					/* %f: filename of desired file */ +					sp++; +					StrNCpy(dp, xlogfname, endp - dp); +					dp += strlen(dp); +					break; +				case 'r': +					/* %r: filename of last restartpoint */ +					sp++; +					StrNCpy(dp, lastRestartPointFname, endp - dp); +					dp += strlen(dp); +					break; +				case '%': +					/* convert %% to a single % */ +					sp++; +					if (dp < endp) +						*dp++ = *sp; +					break; +				default: +					/* otherwise treat the % as not special */ +					if (dp < endp) +						*dp++ = *sp; +					break; +			} +		} +		else +		{ +			if (dp < endp) +				*dp++ = *sp; +		} +	} +	*dp = '\0'; + +	ereport(DEBUG3, +			(errmsg_internal("executing restore command \"%s\"", +							 xlogRestoreCmd))); + +	/* +	 * Check signals before restore command and reset afterwards. +	 */ +	PreRestoreCommand(); + +	/* +	 * Copy xlog from archival storage to XLOGDIR +	 */ +	rc = system(xlogRestoreCmd); + +	PostRestoreCommand(); + +	if (rc == 0) +	{ +		/* +		 * command apparently succeeded, but let's make sure the file is +		 * really there now and has the correct size. +		 */ +		if (stat(xlogpath, &stat_buf) == 0) +		{ +			if (expectedSize > 0 && stat_buf.st_size != expectedSize) +			{ +				int			elevel; + +				/* +				 * If we find a partial file in standby mode, we assume it's +				 * because it's just being copied to the archive, and keep +				 * trying. +				 * +				 * Otherwise treat a wrong-sized file as FATAL to ensure the +				 * DBA would notice it, but is that too strong? We could try +				 * to plow ahead with a local copy of the file ... but the +				 * problem is that there probably isn't one, and we'd +				 * incorrectly conclude we've reached the end of WAL and we're +				 * done recovering ... +				 */ +				if (StandbyMode && stat_buf.st_size < expectedSize) +					elevel = DEBUG1; +				else +					elevel = FATAL; +				ereport(elevel, +						(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", +								xlogfname, +								(unsigned long) stat_buf.st_size, +								(unsigned long) expectedSize))); +				return false; +			} +			else +			{ +				ereport(LOG, +						(errmsg("restored log file \"%s\" from archive", +								xlogfname))); +				strcpy(path, xlogpath); +				return true; +			} +		} +		else +		{ +			/* stat failed */ +			if (errno != ENOENT) +				ereport(FATAL, +						(errcode_for_file_access(), +						 errmsg("could not stat file \"%s\": %m", +								xlogpath))); +		} +	} + +	/* +	 * Remember, we rollforward UNTIL the restore fails so failure here is +	 * just part of the process... that makes it difficult to determine +	 * whether the restore failed because there isn't an archive to restore, +	 * or because the administrator has specified the restore program +	 * incorrectly.  We have to assume the former. +	 * +	 * However, if the failure was due to any sort of signal, it's best to +	 * punt and abort recovery.  (If we "return false" here, upper levels will +	 * assume that recovery is complete and start up the database!) It's +	 * essential to abort on child SIGINT and SIGQUIT, because per spec +	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of +	 * those it's a good bet we should have gotten it too. +	 * +	 * On SIGTERM, assume we have received a fast shutdown request, and exit +	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the +	 * child process. If we receive it first, the signal handler will call +	 * proc_exit, otherwise we do it here. If we or the child process received +	 * SIGTERM for any other reason than a fast shutdown request, postmaster +	 * will perform an immediate shutdown when it sees us exiting +	 * unexpectedly. +	 * +	 * Per the Single Unix Spec, shells report exit status > 128 when a called +	 * command died on a signal.  Also, 126 and 127 are used to report +	 * problems such as an unfindable command; treat those as fatal errors +	 * too. +	 */ +	if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) +		proc_exit(1); + +	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + +	ereport(signaled ? FATAL : DEBUG2, +		(errmsg("could not restore file \"%s\" from archive: return code %d", +				xlogfname, rc))); + +not_available: + +	/* +	 * if an archived file is not available, there might still be a version of +	 * this file in XLOGDIR, so return that as the filename to open. +	 * +	 * In many recovery scenarios we expect this to fail also, but if so that +	 * just means we've reached the end of WAL. +	 */ +	snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname); +	return false; +} + +/* + * Attempt to execute an external shell command during recovery. + * + * 'command' is the shell command to be executed, 'commandName' is a + * human-readable name describing the command emitted in the logs. If + * 'failOnSignal' is true and the command is killed by a signal, a FATAL + * error is thrown. Otherwise a WARNING is emitted. + * + * This is currently used for recovery_end_command and archive_cleanup_command. + */ +void +ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal) +{ +	char		xlogRecoveryCmd[MAXPGPATH]; +	char		lastRestartPointFname[MAXPGPATH]; +	char	   *dp; +	char	   *endp; +	const char *sp; +	int			rc; +	bool		signaled; +	XLogSegNo	restartSegNo; +	XLogRecPtr	restartRedoPtr; +	TimeLineID	restartTli; + +	Assert(command && commandName); + +	/* +	 * Calculate the archive file cutoff point for use during log shipping +	 * replication. All files earlier than this point can be deleted from the +	 * archive, though there is no requirement to do so. +	 */ +	GetOldestRestartPoint(&restartRedoPtr, &restartTli); +	XLByteToSeg(restartRedoPtr, restartSegNo); +	XLogFileName(lastRestartPointFname, restartTli, restartSegNo); +	LWLockRelease(ControlFileLock); + +	/* +	 * construct the command to be executed +	 */ +	dp = xlogRecoveryCmd; +	endp = xlogRecoveryCmd + MAXPGPATH - 1; +	*endp = '\0'; + +	for (sp = command; *sp; sp++) +	{ +		if (*sp == '%') +		{ +			switch (sp[1]) +			{ +				case 'r': +					/* %r: filename of last restartpoint */ +					sp++; +					StrNCpy(dp, lastRestartPointFname, endp - dp); +					dp += strlen(dp); +					break; +				case '%': +					/* convert %% to a single % */ +					sp++; +					if (dp < endp) +						*dp++ = *sp; +					break; +				default: +					/* otherwise treat the % as not special */ +					if (dp < endp) +						*dp++ = *sp; +					break; +			} +		} +		else +		{ +			if (dp < endp) +				*dp++ = *sp; +		} +	} +	*dp = '\0'; + +	ereport(DEBUG3, +			(errmsg_internal("executing %s \"%s\"", commandName, command))); + +	/* +	 * execute the constructed command +	 */ +	rc = system(xlogRecoveryCmd); +	if (rc != 0) +	{ +		/* +		 * If the failure was due to any sort of signal, it's best to punt and +		 * abort recovery. See also detailed comments on signals in +		 * RestoreArchivedFile(). +		 */ +		signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + +		ereport((signaled && failOnSignal) ? FATAL : WARNING, +		/*------ +		   translator: First %s represents a recovery.conf parameter name like +		  "recovery_end_command", and the 2nd is the value of that parameter. */ +				(errmsg("%s \"%s\": return code %d", commandName, +						command, rc))); +	} +} + + +/* + * XLogArchiveNotify + * + * Create an archive notification file + * + * The name of the notification file is the message that will be picked up + * by the archiver, e.g. we write 0000000100000001000000C6.ready + * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6, + * then when complete, rename it to 0000000100000001000000C6.done + */ +void +XLogArchiveNotify(const char *xlog) +{ +	char		archiveStatusPath[MAXPGPATH]; +	FILE	   *fd; + +	/* insert an otherwise empty file called <XLOG>.ready */ +	StatusFilePath(archiveStatusPath, xlog, ".ready"); +	fd = AllocateFile(archiveStatusPath, "w"); +	if (fd == NULL) +	{ +		ereport(LOG, +				(errcode_for_file_access(), +				 errmsg("could not create archive status file \"%s\": %m", +						archiveStatusPath))); +		return; +	} +	if (FreeFile(fd)) +	{ +		ereport(LOG, +				(errcode_for_file_access(), +				 errmsg("could not write archive status file \"%s\": %m", +						archiveStatusPath))); +		return; +	} + +	/* Notify archiver that it's got something to do */ +	if (IsUnderPostmaster) +		SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); +} + +/* + * Convenience routine to notify using segment number representation of filename + */ +void +XLogArchiveNotifySeg(XLogSegNo segno) +{ +	char		xlog[MAXFNAMELEN]; + +	XLogFileName(xlog, ThisTimeLineID, segno); +	XLogArchiveNotify(xlog); +} + +/* + * XLogArchiveCheckDone + * + * This is called when we are ready to delete or recycle an old XLOG segment + * file or backup history file.  If it is okay to delete it then return true. + * If it is not time to delete it, make sure a .ready file exists, and return + * false. + * + * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists, + * then return false; else create <XLOG>.ready and return false. + * + * The reason we do things this way is so that if the original attempt to + * create <XLOG>.ready fails, we'll retry during subsequent checkpoints. + */ +bool +XLogArchiveCheckDone(const char *xlog) +{ +	char		archiveStatusPath[MAXPGPATH]; +	struct stat stat_buf; + +	/* Always deletable if archiving is off */ +	if (!XLogArchivingActive()) +		return true; + +	/* First check for .done --- this means archiver is done with it */ +	StatusFilePath(archiveStatusPath, xlog, ".done"); +	if (stat(archiveStatusPath, &stat_buf) == 0) +		return true; + +	/* check for .ready --- this means archiver is still busy with it */ +	StatusFilePath(archiveStatusPath, xlog, ".ready"); +	if (stat(archiveStatusPath, &stat_buf) == 0) +		return false; + +	/* Race condition --- maybe archiver just finished, so recheck */ +	StatusFilePath(archiveStatusPath, xlog, ".done"); +	if (stat(archiveStatusPath, &stat_buf) == 0) +		return true; + +	/* Retry creation of the .ready file */ +	XLogArchiveNotify(xlog); +	return false; +} + +/* + * XLogArchiveIsBusy + * + * Check to see if an XLOG segment file is still unarchived. + * This is almost but not quite the inverse of XLogArchiveCheckDone: in + * the first place we aren't chartered to recreate the .ready file, and + * in the second place we should consider that if the file is already gone + * then it's not busy.  (This check is needed to handle the race condition + * that a checkpoint already deleted the no-longer-needed file.) + */ +bool +XLogArchiveIsBusy(const char *xlog) +{ +	char		archiveStatusPath[MAXPGPATH]; +	struct stat stat_buf; + +	/* First check for .done --- this means archiver is done with it */ +	StatusFilePath(archiveStatusPath, xlog, ".done"); +	if (stat(archiveStatusPath, &stat_buf) == 0) +		return false; + +	/* check for .ready --- this means archiver is still busy with it */ +	StatusFilePath(archiveStatusPath, xlog, ".ready"); +	if (stat(archiveStatusPath, &stat_buf) == 0) +		return true; + +	/* Race condition --- maybe archiver just finished, so recheck */ +	StatusFilePath(archiveStatusPath, xlog, ".done"); +	if (stat(archiveStatusPath, &stat_buf) == 0) +		return false; + +	/* +	 * Check to see if the WAL file has been removed by checkpoint, which +	 * implies it has already been archived, and explains why we can't see a +	 * status file for it. +	 */ +	snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog); +	if (stat(archiveStatusPath, &stat_buf) != 0 && +		errno == ENOENT) +		return false; + +	return true; +} + +/* + * XLogArchiveCleanup + * + * Cleanup archive notification file(s) for a particular xlog segment + */ +void +XLogArchiveCleanup(const char *xlog) +{ +	char		archiveStatusPath[MAXPGPATH]; + +	/* Remove the .done file */ +	StatusFilePath(archiveStatusPath, xlog, ".done"); +	unlink(archiveStatusPath); +	/* should we complain about failure? */ + +	/* Remove the .ready file if present --- normally it shouldn't be */ +	StatusFilePath(archiveStatusPath, xlog, ".ready"); +	unlink(archiveStatusPath); +	/* should we complain about failure? */ +} diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h new file mode 100644 index 00000000000..f2a7658bc45 --- /dev/null +++ b/src/include/access/timeline.h @@ -0,0 +1,23 @@ +/* + * timeline.h + * + * Functions for reading and writing timeline history files. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/timeline.h + */ +#ifndef TIMELINE_H +#define TIMELINE_H + +#include "access/xlogdefs.h" +#include "nodes/pg_list.h" + +extern List *readTimeLineHistory(TimeLineID targetTLI); +extern bool existsTimeLineHistory(TimeLineID probeTLI); +extern TimeLineID findNewestTimeLine(TimeLineID startTLI); +extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, +					 TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason); + +#endif   /* TIMELINE_H */ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index b5bfb7b4072..2c66b2feb8a 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -229,6 +229,29 @@ extern const RmgrData RmgrTable[];  extern pg_time_t GetLastSegSwitchTime(void);  extern XLogRecPtr RequestXLogSwitch(void); +extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); + +/* + * Exported for the functions in timeline.c and xlogarchive.c.  Only valid + * in the startup process. + */ +extern bool InArchiveRecovery; +extern bool StandbyMode; +extern char *recoveryRestoreCommand; + +/* + * Prototypes for functions in xlogarchive.c + */ +extern bool RestoreArchivedFile(char *path, const char *xlogfname, +					const char *recovername, off_t expectedSize); +extern void ExecuteRecoveryCommand(char *command, char *commandName, +					   bool failOnerror); +extern void XLogArchiveNotify(const char *xlog); +extern void XLogArchiveNotifySeg(XLogSegNo segno); +extern bool XLogArchiveCheckDone(const char *xlog); +extern bool XLogArchiveIsBusy(const char *xlog); +extern void XLogArchiveCleanup(const char *xlog); +  /*   * These aren't in xlog.h because I'd rather not include fmgr.h there.   */ | 
