diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-08-16 13:12:10 -0400 | 
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-08-16 13:12:10 -0400 | 
| commit | 93519b0c620123301142ac49b79796be20c2dce8 (patch) | |
| tree | c4a4b6cd8c042f1da988a906268ef15dc141760b | |
| parent | f239ec57277b3fffe1c5bd2694a9d0d726d3a259 (diff) | |
Fix race condition in relcache init file invalidation.
The previous code tried to synchronize by unlinking the init file twice,
but that doesn't actually work: it leaves a window wherein a third process
could read the already-stale init file but miss the SI messages that would
tell it the data is stale.  The result would be bizarre failures in catalog
accesses, typically "could not read block 0 in file ..." later during
startup.
Instead, hold RelCacheInitLock across both the unlink and the sending of
the SI messages.  This is more straightforward, and might even be a bit
faster since only one unlink call is needed.
This has been wrong since it was put in (in 2002!), so back-patch to all
supported releases.
| -rw-r--r-- | src/backend/access/transam/twophase.c | 4 | ||||
| -rw-r--r-- | src/backend/utils/cache/inval.c | 33 | ||||
| -rw-r--r-- | src/backend/utils/cache/relcache.c | 66 | ||||
| -rw-r--r-- | src/include/utils/relcache.h | 3 | 
4 files changed, 57 insertions, 49 deletions
| diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index d6dca97bce8..4691c7e14ab 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1338,10 +1338,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)  	 * after we send the SI messages. See AtEOXact_Inval()  	 */  	if (hdr->initfileinval) -		RelationCacheInitFileInvalidate(true); +		RelationCacheInitFilePreInvalidate();  	SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);  	if (hdr->initfileinval) -		RelationCacheInitFileInvalidate(false); +		RelationCacheInitFilePostInvalidate();  	/* And now do the callbacks */  	if (isCommit) diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 7a67f4a85e8..d17ade97f81 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -849,24 +849,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,  	return numSharedInvalidMessagesArray;  } -#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \ -{ \ -	DatabasePath = GetDatabasePath(dbo, tbo); \ -	elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \ -	RelationCacheInitFileInvalidate(tf); \ -	pfree(DatabasePath); \ -} -  /*   * ProcessCommittedInvalidationMessages is executed by xact_redo_commit()   * to process invalidation messages added to commit records.   *   * Relcache init file invalidation requires processing both   * before and after we send the SI messages. See AtEOXact_Inval() - * - * We deliberately avoid SetDatabasePath() since it is intended to be used - * only once by normal backends, so we set DatabasePath directly then - * pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.   */  void  ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, @@ -880,12 +868,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,  		 (RelcacheInitFileInval ? " and relcache file invalidation" : ""));  	if (RelcacheInitFileInval) -		RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true); +	{ +		/* +		 * RelationCacheInitFilePreInvalidate requires DatabasePath to be set, +		 * but we should not use SetDatabasePath during recovery, since it is +		 * intended to be used only once by normal backends.  Hence, a quick +		 * hack: set DatabasePath directly then unset after use. +		 */ +		DatabasePath = GetDatabasePath(dbid, tsid); +		elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"", +			 DatabasePath); +		RelationCacheInitFilePreInvalidate(); +		pfree(DatabasePath); +		DatabasePath = NULL; +	}  	SendSharedInvalidMessages(msgs, nmsgs);  	if (RelcacheInitFileInval) -		RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false); +		RelationCacheInitFilePostInvalidate();  }  /* @@ -926,7 +927,7 @@ AtEOXact_Inval(bool isCommit)  		 * unless we committed.  		 */  		if (transInvalInfo->RelcacheInitFileInval) -			RelationCacheInitFileInvalidate(true); +			RelationCacheInitFilePreInvalidate();  		AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,  								   &transInvalInfo->CurrentCmdInvalidMsgs); @@ -935,7 +936,7 @@ AtEOXact_Inval(bool isCommit)  										 SendSharedInvalidMessages);  		if (transInvalInfo->RelcacheInitFileInval) -			RelationCacheInitFileInvalidate(false); +			RelationCacheInitFilePostInvalidate();  	}  	else if (transInvalInfo != NULL)  	{ diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 81cea8b6040..eff02adf869 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -4402,8 +4402,8 @@ write_relcache_init_file(bool shared)  	 * updated by SI message processing, but we can't be sure whether what we  	 * wrote out was up-to-date.)  	 * -	 * This mustn't run concurrently with RelationCacheInitFileInvalidate, so -	 * grab a serialization lock for the duration. +	 * This mustn't run concurrently with the code that unlinks an init file +	 * and sends SI messages, so grab a serialization lock for the duration.  	 */  	LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); @@ -4467,19 +4467,22 @@ RelationIdIsInInitFile(Oid relationId)   * changed one or more of the relation cache entries that are kept in the   * local init file.   * - * We actually need to remove the init file twice: once just before sending - * the SI messages that include relcache inval for such relations, and once - * just after sending them.  The unlink before ensures that a backend that's - * currently starting cannot read the now-obsolete init file and then miss - * the SI messages that will force it to update its relcache entries.  (This - * works because the backend startup sequence gets into the PGPROC array before - * trying to load the init file.)  The unlink after is to synchronize with a - * backend that may currently be trying to write an init file based on data - * that we've just rendered invalid.  Such a backend will see the SI messages, - * but we can't leave the init file sitting around to fool later backends. + * To be safe against concurrent inspection or rewriting of the init file, + * we must take RelCacheInitLock, then remove the old init file, then send + * the SI messages that include relcache inval for such relations, and then + * release RelCacheInitLock.  This serializes the whole affair against + * write_relcache_init_file, so that we can be sure that any other process + * that's concurrently trying to create a new init file won't move an + * already-stale version into place after we unlink.  Also, because we unlink + * before sending the SI messages, a backend that's currently starting cannot + * read the now-obsolete init file and then miss the SI messages that will + * force it to update its relcache entries.  (This works because the backend + * startup sequence gets into the sinval array before trying to load the init + * file.)   * - * Ignore any failure to unlink the file, since it might not be there if - * no backend has been started since the last removal. + * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate, + * then release the lock in RelationCacheInitFilePostInvalidate.  Caller must + * send any pending SI messages between those calls.   *   * Notice this deals only with the local init file, not the shared init file.   * The reason is that there can never be a "significant" change to the @@ -4489,34 +4492,37 @@ RelationIdIsInInitFile(Oid relationId)   * be invalid enough to make it necessary to remove it.   */  void -RelationCacheInitFileInvalidate(bool beforeSend) +RelationCacheInitFilePreInvalidate(void)  {  	char		initfilename[MAXPGPATH];  	snprintf(initfilename, sizeof(initfilename), "%s/%s",  			 DatabasePath, RELCACHE_INIT_FILENAME); -	if (beforeSend) -	{ -		/* no interlock needed here */ -		unlink(initfilename); -	} -	else +	LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); + +	if (unlink(initfilename) < 0)  	{  		/* -		 * We need to interlock this against write_relcache_init_file, to -		 * guard against possibility that someone renames a new-but- -		 * already-obsolete init file into place just after we unlink. With -		 * the interlock, it's certain that write_relcache_init_file will -		 * notice our SI inval message before renaming into place, or else -		 * that we will execute second and successfully unlink the file. +		 * The file might not be there if no backend has been started since +		 * the last removal.  But complain about failures other than ENOENT. +		 * Fortunately, it's not too late to abort the transaction if we +		 * can't get rid of the would-be-obsolete init file.  		 */ -		LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); -		unlink(initfilename); -		LWLockRelease(RelCacheInitLock); +		if (errno != ENOENT) +			ereport(ERROR, +					(errcode_for_file_access(), +					 errmsg("could not remove cache file \"%s\": %m", +							initfilename)));  	}  } +void +RelationCacheInitFilePostInvalidate(void) +{ +	LWLockRelease(RelCacheInitLock); +} +  /*   * Remove the init files during postmaster startup.   * diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 4db4ba5db28..30c7a2acea3 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -96,7 +96,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,   * Routines to help manage rebuilding of relcache init files   */  extern bool RelationIdIsInInitFile(Oid relationId); -extern void RelationCacheInitFileInvalidate(bool beforeSend); +extern void RelationCacheInitFilePreInvalidate(void); +extern void RelationCacheInitFilePostInvalidate(void);  extern void RelationCacheInitFileRemove(void);  /* should be used only by relcache.c and catcache.c */ | 
