Fix assorted bugs in CREATE INDEX CONCURRENTLY.

This patch changes CREATE INDEX CONCURRENTLY so that the pg_index flag changes it makes without exclusive lock on the index are made via heap_inplace_update() rather than a normal transactional update. The latter is not very safe because moving the pg_index tuple could result in concurrent SnapshotNow scans finding it twice or not at all, thus possibly resulting in index corruption. In addition, fix various places in the code that ought to check to make sure that the indexes they are manipulating are valid and/or ready as appropriate. These represent bugs that have existed since 8.2, since a failed CREATE INDEX CONCURRENTLY could leave a corrupt or invalid index behind, and we ought not try to do anything that might fail with such an index. Also fix RelationReloadIndexInfo to ensure it copies all the pg_index columns that are allowed to change after initial creation. Previously we could have been left with stale values of some fields in an index relcache entry. It's not clear whether this actually had any user-visible consequences, but it's at least a bug waiting to happen. This is a subset of a patch already applied in 9.2 and HEAD. Back-patch into all earlier supported branches. Tom Lane and Andres Freund
author: Tom Lane <tgl@sss.pgh.pa.us> 2012-11-29 14:50:39 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2012-11-29 14:52:07 -0500
commit: 1dbd02dc37efb74b770aa834b2cb65dae2446640 (patch)
tree: b2a6dd6301b09fb40e99f978017e3337c85e3595 /src/backend/catalog/index.c
parent: 3dfdf28152eb2df7cb4d4e43f461903fcc09e1d2 (diff)
1 files changed, 117 insertions, 24 deletions
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 55f69fa56da..40712988cd3 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1126,7 +1126,7 @@ BuildIndexInfo(Relation index)
 
 	/* other info */
 	ii->ii_Unique = indexStruct->indisunique;
-	ii->ii_ReadyForInserts = indexStruct->indisready;
+	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
 
 	/* initialize index-build state to default */
 	ii->ii_Concurrent = false;
@@ -1465,8 +1465,20 @@ index_build(Relation heapRelation,
 	 * index's usability horizon.  Moreover, we *must not* try to change
 	 * the index's pg_index entry while reindexing pg_index itself, and this
 	 * optimization nicely prevents that.
-	 */
-	if (indexInfo->ii_BrokenHotChain && !isreindex)
+	 *
+	 * We also need not set indcheckxmin during a concurrent index build,
+	 * because we won't set indisvalid true until all transactions that care
+	 * about the broken HOT chains are gone.
+	 *
+	 * Therefore, this code path can only be taken during non-concurrent
+	 * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
+	 * tuple's xmin doesn't matter, because that tuple was created in the
+	 * current transaction anyway.	That also means we don't need to worry
+	 * about any concurrent readers of the tuple; no other transaction can see
+	 * it yet.
+	 */
+	if (indexInfo->ii_BrokenHotChain && !isreindex &&
+		!indexInfo->ii_Concurrent)
 	{
 		Oid			indexId = RelationGetRelid(indexRelation);
 		Relation	pg_index;
@@ -2409,6 +2421,65 @@ validate_index_heapscan(Relation heapRelation,
 
 
 /*
+ * index_set_state_flags - adjust pg_index state flags
+ *
+ * This is used during CREATE INDEX CONCURRENTLY to adjust the pg_index
+ * flags that denote the index's state.  We must use an in-place update of
+ * the pg_index tuple, because we do not have exclusive lock on the parent
+ * table and so other sessions might concurrently be doing SnapshotNow scans
+ * of pg_index to identify the table's indexes.  A transactional update would
+ * risk somebody not seeing the index at all.  Because the update is not
+ * transactional and will not roll back on error, this must only be used as
+ * the last step in a transaction that has not made any transactional catalog
+ * updates!
+ *
+ * Note that heap_inplace_update does send a cache inval message for the
+ * tuple, so other sessions will hear about the update as soon as we commit.
+ */
+void
+index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
+{
+	Relation	pg_index;
+	HeapTuple	indexTuple;
+	Form_pg_index indexForm;
+
+	/* Assert that current xact hasn't done any transactional updates */
+	Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
+
+	/* Open pg_index and fetch a writable copy of the index's tuple */
+	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
+
+	indexTuple = SearchSysCacheCopy1(INDEXRELID,
+									 ObjectIdGetDatum(indexId));
+	if (!HeapTupleIsValid(indexTuple))
+		elog(ERROR, "cache lookup failed for index %u", indexId);
+	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+	/* Perform the requested state change on the copy */
+	switch (action)
+	{
+		case INDEX_CREATE_SET_READY:
+			/* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
+			Assert(!indexForm->indisready);
+			Assert(!indexForm->indisvalid);
+			indexForm->indisready = true;
+			break;
+		case INDEX_CREATE_SET_VALID:
+			/* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
+			Assert(indexForm->indisready);
+			Assert(!indexForm->indisvalid);
+			indexForm->indisvalid = true;
+			break;
+	}
+
+	/* ... and write it back in-place */
+	heap_inplace_update(pg_index, indexTuple);
+
+	heap_close(pg_index, RowExclusiveLock);
+}
+
+
+/*
  * IndexGetRelation: given an index's relation OID, get the OID of the
  * relation it is an index on.	Uses the system cache.
  */
@@ -2437,12 +2508,9 @@ void
 reindex_index(Oid indexId, bool skip_constraint_checks)
 {
 	Relation	iRel,
-				heapRelation,
-				pg_index;
+				heapRelation;
 	Oid			heapId;
 	IndexInfo  *indexInfo;
-	HeapTuple	indexTuple;
-	Form_pg_index indexForm;
 	volatile bool skipped_constraint = false;
 
 	/*
@@ -2516,25 +2584,39 @@ reindex_index(Oid indexId, bool skip_constraint_checks)
 	 *
 	 * We can also reset indcheckxmin, because we have now done a
 	 * non-concurrent index build, *except* in the case where index_build
-	 * found some still-broken HOT chains.  If it did, we normally leave
-	 * indcheckxmin alone (note that index_build won't have changed it,
-	 * because this is a reindex).  But if the index was invalid or not ready
-	 * and there were broken HOT chains, it seems best to force indcheckxmin
-	 * true, because the normal argument that the HOT chains couldn't conflict
-	 * with the index is suspect for an invalid index.
+	 * found some still-broken HOT chains. If it did, and we don't have to
+	 * change any of the other flags, we just leave indcheckxmin alone (note
+	 * that index_build won't have changed it, because this is a reindex).
+	 * This is okay and desirable because not updating the tuple leaves the
+	 * index's usability horizon (recorded as the tuple's xmin value) the same
+	 * as it was.
 	 *
-	 * Note that it is important to not update the pg_index entry if we don't
-	 * have to, because updating it will move the index's usability horizon
-	 * (recorded as the tuple's xmin value) if indcheckxmin is true.  We don't
-	 * really want REINDEX to move the usability horizon forward ever, but we
-	 * have no choice if we are to fix indisvalid or indisready.  Of course,
-	 * clearing indcheckxmin eliminates the issue, so we're happy to do that
-	 * if we can.  Another reason for caution here is that while reindexing
-	 * pg_index itself, we must not try to update it.  We assume that
-	 * pg_index's indexes will always have these flags in their clean state.
+	 * But, if the index was invalid/not-ready and there were broken HOT
+	 * chains, we had better force indcheckxmin true, because the normal
+	 * argument that the HOT chains couldn't conflict with the index is
+	 * suspect for an invalid index.  In this case advancing the usability
+	 * horizon is appropriate.
+	 *
+	 * Note that if we have to update the tuple, there is a risk of concurrent
+	 * transactions not seeing it during their SnapshotNow scans of pg_index.
+	 * While not especially desirable, this is safe because no such
+	 * transaction could be trying to update the table (since we have
+	 * ShareLock on it).  The worst case is that someone might transiently
+	 * fail to use the index for a query --- but it was probably unusable
+	 * before anyway, if we are updating the tuple.
+	 *
+	 * Another reason for avoiding unnecessary updates here is that while
+	 * reindexing pg_index itself, we must not try to update tuples in it.
+	 * pg_index's indexes should always have these flags in their clean state,
+	 * so that won't happen.
 	 */
 	if (!skipped_constraint)
 	{
+		Relation	pg_index;
+		HeapTuple	indexTuple;
+		Form_pg_index indexForm;
+		bool		index_bad;
+
 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
 
 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
@@ -2543,17 +2625,28 @@ reindex_index(Oid indexId, bool skip_constraint_checks)
 			elog(ERROR, "cache lookup failed for index %u", indexId);
 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
 
-		if (!indexForm->indisvalid || !indexForm->indisready ||
+		index_bad = (!indexForm->indisvalid ||
+					 !indexForm->indisready);
+		if (index_bad ||
 			(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
 		{
 			if (!indexInfo->ii_BrokenHotChain)
 				indexForm->indcheckxmin = false;
-			else if (!indexForm->indisvalid || !indexForm->indisready)
+			else if (index_bad)
 				indexForm->indcheckxmin = true;
 			indexForm->indisvalid = true;
 			indexForm->indisready = true;
 			simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
 			CatalogUpdateIndexes(pg_index, indexTuple);
+
+			/*
+			 * Invalidate the relcache for the table, so that after we commit
+			 * all sessions will refresh the table's index list.  This ensures
+			 * that if anyone misses seeing the pg_index row during this
+			 * update, they'll refresh their list before attempting any update
+			 * on the table.
+			 */
+			CacheInvalidateRelcache(heapRelation);
 		}
 
 		heap_close(pg_index, RowExclusiveLock);
author	Tom Lane <tgl@sss.pgh.pa.us>	2012-11-29 14:50:39 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2012-11-29 14:52:07 -0500
commit	1dbd02dc37efb74b770aa834b2cb65dae2446640 (patch)
tree	b2a6dd6301b09fb40e99f978017e3337c85e3595 /src/backend/catalog/index.c
parent	3dfdf28152eb2df7cb4d4e43f461903fcc09e1d2 (diff)