6 files changed, 256 insertions, 10 deletions
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 4fbc6d0720d..809c3a0972c 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -584,7 +584,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 			int			i;
 
 			if (msg->rc.relId == InvalidOid)
-				RelationCacheInvalidate();
+				RelationCacheInvalidate(false);
 			else
 				RelationCacheInvalidateEntry(msg->rc.relId);
 
@@ -642,11 +642,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 void
 InvalidateSystemCaches(void)
 {
+	InvalidateSystemCachesExtended(false);
+}
+
+void
+InvalidateSystemCachesExtended(bool debug_discard)
+{
 	int			i;
 
 	InvalidateCatalogSnapshot();
 	ResetCatalogCaches();
-	RelationCacheInvalidate();	/* gets smgr and relmap too */
+	RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */
 
 	for (i = 0; i < syscache_callback_count; i++)
 	{
@@ -717,7 +723,7 @@ AcceptInvalidationMessages(void)
 		if (recursion_depth < 3)
 		{
 			recursion_depth++;
-			InvalidateSystemCaches();
+			InvalidateSystemCachesExtended(true);
 			recursion_depth--;
 		}
 	}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 32e75560ead..7976a0d66d4 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -151,6 +151,24 @@ bool		criticalSharedRelcachesBuilt = false;
 static long relcacheInvalsReceived = 0L;
 
 /*
+ * in_progress_list is a stack of ongoing RelationBuildDesc() calls.  CREATE
+ * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
+ * It critically relies on each backend absorbing those changes no later than
+ * next transaction start.  Hence, RelationBuildDesc() loops until it finishes
+ * without accepting a relevant invalidation.  (Most invalidation consumers
+ * don't do this.)
+ */
+typedef struct inprogressent
+{
+	Oid			reloid;			/* OID of relation being built */
+	bool		invalidated;	/* whether an invalidation arrived for it */
+} InProgressEnt;
+
+static InProgressEnt *in_progress_list;
+static int	in_progress_list_len;
+static int	in_progress_list_maxlen;
+
+/*
  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
  * cleanup work.  This list intentionally has limited size; if it overflows,
  * we fall back to scanning the whole hashtable.  There is no value in a very
@@ -1043,6 +1061,7 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
 static Relation
 RelationBuildDesc(Oid targetRelId, bool insertIt)
 {
+	int			in_progress_offset;
 	Relation	relation;
 	Oid			relid;
 	HeapTuple	pg_class_tuple;
@@ -1070,6 +1089,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
 	oldcxt = MemoryContextSwitchTo(tmpcxt);
 #endif
 
+	/* Register to catch invalidation messages */
+	if (in_progress_list_len >= in_progress_list_maxlen)
+	{
+		int			allocsize;
+
+		allocsize = in_progress_list_maxlen * 2;
+		in_progress_list = repalloc(in_progress_list,
+									allocsize * sizeof(*in_progress_list));
+		in_progress_list_maxlen = allocsize;
+	}
+	in_progress_offset = in_progress_list_len++;
+	in_progress_list[in_progress_offset].reloid = targetRelId;
+retry:
+	in_progress_list[in_progress_offset].invalidated = false;
+
 	/*
 	 * find the tuple in pg_class corresponding to the given relation id
 	 */
@@ -1085,6 +1119,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
 		MemoryContextSwitchTo(oldcxt);
 		MemoryContextDelete(tmpcxt);
 #endif
+		Assert(in_progress_offset + 1 == in_progress_list_len);
+		in_progress_list_len--;
 		return NULL;
 	}
 
@@ -1245,6 +1281,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
 	heap_freetuple(pg_class_tuple);
 
 	/*
+	 * If an invalidation arrived mid-build, start over.  Between here and the
+	 * end of this function, don't add code that does or reasonably could read
+	 * system catalogs.  That range must be free from invalidation processing
+	 * for the !insertIt case.  For the insertIt case, RelationCacheInsert()
+	 * will enroll this relation in ordinary relcache invalidation processing,
+	 */
+	if (in_progress_list[in_progress_offset].invalidated)
+	{
+		RelationDestroyRelation(relation, false);
+		goto retry;
+	}
+	Assert(in_progress_offset + 1 == in_progress_list_len);
+	in_progress_list_len--;
+
+	/*
 	 * Insert newly created relation into relcache hash table, if requested.
 	 *
 	 * There is one scenario in which we might find a hashtable entry already
@@ -2586,6 +2637,14 @@ RelationClearRelation(Relation relation, bool rebuild)
 
 		/* Build temporary entry, but don't link it into hashtable */
 		newrel = RelationBuildDesc(save_relid, false);
+
+		/*
+		 * Between here and the end of the swap, don't add code that does or
+		 * reasonably could read system catalogs.  That range must be free
+		 * from invalidation processing.  See RelationBuildDesc() manipulation
+		 * of in_progress_list.
+		 */
+
 		if (newrel == NULL)
 		{
 			/*
@@ -2816,6 +2875,14 @@ RelationCacheInvalidateEntry(Oid relationId)
 		relcacheInvalsReceived++;
 		RelationFlushRelation(relation);
 	}
+	else
+	{
+		int			i;
+
+		for (i = 0; i < in_progress_list_len; i++)
+			if (in_progress_list[i].reloid == relationId)
+				in_progress_list[i].invalidated = true;
+	}
 }
 
 /*
@@ -2824,11 +2891,11 @@ RelationCacheInvalidateEntry(Oid relationId)
  *	 and rebuild those with positive reference counts.  Also reset the smgr
  *	 relation cache and re-read relation mapping data.
  *
- *	 This is currently used only to recover from SI message buffer overflow,
- *	 so we do not touch relations having new-in-transaction relfilenodes; they
- *	 cannot be targets of cross-backend SI updates (and our own updates now go
- *	 through a separate linked list that isn't limited by the SI message
- *	 buffer size).
+ *	 Apart from debug_discard_caches, this is currently used only to recover
+ *	 from SI message buffer overflow, so we do not touch relations having
+ *	 new-in-transaction relfilenodes; they cannot be targets of cross-backend
+ *	 SI updates (and our own updates now go through a separate linked list
+ *	 that isn't limited by the SI message buffer size).
  *
  *	 We do this in two phases: the first pass deletes deletable items, and
  *	 the second one rebuilds the rebuildable items.  This is essential for
@@ -2846,9 +2913,14 @@ RelationCacheInvalidateEntry(Oid relationId)
  *	 second pass processes nailed-in-cache items before other nondeletable
  *	 items.  This should ensure that system catalogs are up to date before
  *	 we attempt to use them to reload information about other open relations.
+ *
+ *	 After those two phases of work having immediate effects, we normally
+ *	 signal any RelationBuildDesc() on the stack to start over.  However, we
+ *	 don't do this if called as part of debug_discard_caches.  Otherwise,
+ *	 RelationBuildDesc() would become an infinite loop.
  */
 void
-RelationCacheInvalidate(void)
+RelationCacheInvalidate(bool debug_discard)
 {
 	HASH_SEQ_STATUS status;
 	RelIdCacheEnt *idhentry;
@@ -2856,6 +2928,7 @@ RelationCacheInvalidate(void)
 	List	   *rebuildFirstList = NIL;
 	List	   *rebuildList = NIL;
 	ListCell   *l;
+	int			i;
 
 	/*
 	 * Reload relation mapping data before starting to reconstruct cache.
@@ -2942,6 +3015,11 @@ RelationCacheInvalidate(void)
 		RelationClearRelation(relation, true);
 	}
 	list_free(rebuildList);
+
+	if (!debug_discard)
+		/* Any RelationBuildDesc() on the stack must start over. */
+		for (i = 0; i < in_progress_list_len; i++)
+			in_progress_list[i].invalidated = true;
 }
 
 /*
@@ -3093,6 +3171,13 @@ AtEOXact_RelationCache(bool isCommit)
 	int			i;
 
 	/*
+	 * Forget in_progress_list.  This is relevant when we're aborting due to
+	 * an error during RelationBuildDesc().
+	 */
+	Assert(in_progress_list_len == 0 || !isCommit);
+	in_progress_list_len = 0;
+
+	/*
 	 * Unless the eoxact_list[] overflowed, we only need to examine the rels
 	 * listed in it.  Otherwise fall back on a hash_seq_search scan.
 	 *
@@ -3239,6 +3324,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
 	int			i;
 
 	/*
+	 * Forget in_progress_list.  This is relevant when we're aborting due to
+	 * an error during RelationBuildDesc().  We don't commit subtransactions
+	 * during RelationBuildDesc().
+	 */
+	Assert(in_progress_list_len == 0 || !isCommit);
+	in_progress_list_len = 0;
+
+	/*
 	 * Unless the eoxact_list[] overflowed, we only need to examine the rels
 	 * listed in it.  Otherwise fall back on a hash_seq_search scan.  Same
 	 * logic as in AtEOXact_RelationCache.
@@ -3786,6 +3879,7 @@ void
 RelationCacheInitialize(void)
 {
 	HASHCTL		ctl;
+	int			allocsize;
 
 	/*
 	 * make sure cache memory context exists
@@ -3803,6 +3897,15 @@ RelationCacheInitialize(void)
 								  &ctl, HASH_ELEM | HASH_BLOBS);
 
 	/*
+	 * reserve enough in_progress_list slots for many cases
+	 */
+	allocsize = 4;
+	in_progress_list =
+		MemoryContextAlloc(CacheMemoryContext,
+						   allocsize * sizeof(*in_progress_list));
+	in_progress_list_maxlen = allocsize;
+
+	/*
 	 * relation mapper needs to be initialized too
 	 */
 	RelationMapInitialize();
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index bc5081cf721..4c6b86c9610 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
 extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
 
 extern void InvalidateSystemCaches(void);
+extern void InvalidateSystemCachesExtended(bool debug_discard);
 #endif							/* INVAL_H */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index a432a407b08..ff478c3ba7f 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -120,7 +120,7 @@ extern void RelationForgetRelation(Oid rid);
 
 extern void RelationCacheInvalidateEntry(Oid relationId);
 
-extern void RelationCacheInvalidate(void);
+extern void RelationCacheInvalidate(bool debug_discard);
 
 extern void RelationCloseSmgrByOid(Oid relationId);
 
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 52f552be85b..cbcb74866b9 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -1638,6 +1638,141 @@ sub interactive_psql
 	return $harness;
 }
 
+# Common sub of pgbench-invoking interfaces.  Makes any requested script files
+# and returns pgbench command-line options causing use of those files.
+sub _pgbench_make_files
+{
+	my ($self, $files) = @_;
+	my @file_opts;
+
+	if (defined $files)
+	{
+
+		# note: files are ordered for determinism
+		for my $fn (sort keys %$files)
+		{
+			my $filename = $self->basedir . '/' . $fn;
+			push @file_opts, '-f', $filename;
+
+			# cleanup file weight
+			$filename =~ s/\@\d+$//;
+
+			#push @filenames, $filename;
+			# filenames are expected to be unique on a test
+			if (-e $filename)
+			{
+				ok(0, "$filename must not already exist");
+				unlink $filename or die "cannot unlink $filename: $!";
+			}
+			TestLib::append_to_file($filename, $$files{$fn});
+		}
+	}
+
+	return @file_opts;
+}
+
+=pod
+
+=item $node->pgbench($opts, $stat, $out, $err, $name, $files, @args)
+
+Invoke B<pgbench>, with parameters and files.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $stat
+
+Expected exit status.
+
+=item $out
+
+Reference to a regexp list that must match stdout.
+
+=item $err
+
+Reference to a regexp list that must match stderr.
+
+=item $name
+
+Name of test for error messages.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=item @args
+
+Further raw options or arguments.
+
+=back
+
+=cut
+
+sub pgbench
+{
+	local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+	my ($self, $opts, $stat, $out, $err, $name, $files, @args) = @_;
+	my @cmd = (
+		'pgbench',
+		split(/\s+/, $opts),
+		$self->_pgbench_make_files($files), @args);
+
+	$self->command_checks_all(\@cmd, $stat, $out, $err, $name);
+}
+
+=pod
+
+=item $node->background_pgbench($opts, $files, \$stdout, $timer) => harness
+
+Invoke B<pgbench> and return an IPC::Run harness object.  The process's stdin
+is empty, and its stdout and stderr go to the $stdout scalar reference.  This
+allows the caller to act on other parts of the system while B<pgbench> is
+running.  Errors from B<pgbench> are the caller's problem.
+
+The specified timer object is attached to the harness, as well.  It's caller's
+responsibility to select the timeout length, and to restart the timer after
+each command if the timeout is per-command.
+
+Be sure to "finish" the harness when done with it.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=back
+
+=cut
+
+sub background_pgbench
+{
+	my ($self, $opts, $files, $stdout, $timer) = @_;
+
+	my @cmd =
+	  ('pgbench', split(/\s+/, $opts), $self->_pgbench_make_files($files));
+
+	local $ENV{PGHOST} = $self->host;
+	local $ENV{PGPORT} = $self->port;
+
+	my $stdin = "";
+	# IPC::Run would otherwise append to existing contents:
+	$$stdout = "" if ref($stdout);
+
+	my $harness = IPC::Run::start \@cmd, '<', \$stdin, '>', $stdout, '2>&1',
+	  $timer;
+
+	return $harness;
+}
+
 =pod
 
 =item $node->poll_query_until($dbname, $query [, $expected ])
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 918d14bcae2..50fdb17fa3f 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1044,6 +1044,7 @@ ImportForeignSchemaStmt
 ImportForeignSchemaType
 ImportForeignSchema_function
 ImportQual
+InProgressEnt
 IncludeWal
 InclusionOpaque
 IncrementVarSublevelsUp_context