summaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/cluster.c29
-rw-r--r--src/backend/commands/copy.c52
-rw-r--r--src/backend/commands/createas.c15
-rw-r--r--src/backend/commands/matview.c16
-rw-r--r--src/backend/commands/tablecmds.c56
5 files changed, 61 insertions, 107 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 43b8f2b3042..c1b5cd4fd40 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -747,7 +747,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
bool *isnull;
IndexScanDesc indexScan;
HeapScanDesc heapScan;
- bool use_wal;
bool is_system_catalog;
TransactionId OldestXmin;
TransactionId FreezeXid;
@@ -803,12 +802,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
/*
- * We need to log the copied data in WAL iff WAL archiving/streaming is
- * enabled AND it's a WAL-logged rel.
+ * Valid smgr_targblock implies something already wrote to the relation.
+ * This may be harmless, but this function hasn't planned for it.
*/
- use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
-
- /* use_wal off requires smgr_targblock be initially invalid */
Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
/*
@@ -876,7 +872,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
/* Initialize the rewrite operation */
rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
- MultiXactCutoff, use_wal);
+ MultiXactCutoff);
/*
* Decide whether to use an indexscan or seqscan-and-optional-sort to scan
@@ -1247,6 +1243,25 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
}
/*
+ * Recognize that rel1's relfilenode (swapped from rel2) is new in this
+ * subtransaction. The rel2 storage (swapped from rel1) may or may not be
+ * new.
+ */
+ {
+ Relation rel1,
+ rel2;
+
+ rel1 = relation_open(r1, NoLock);
+ rel2 = relation_open(r2, NoLock);
+ rel2->rd_createSubid = rel1->rd_createSubid;
+ rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid;
+ rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid;
+ RelationAssumeNewRelfilenode(rel1);
+ relation_close(rel1, NoLock);
+ relation_close(rel2, NoLock);
+ }
+
+ /*
* In the case of a shared catalog, these next few steps will only affect
* our own database's pg_class row; but that's okay, because they are all
* noncritical updates. That's also an important fact for the case of a
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 92fee9a497b..4ff4d5fdd24 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2310,49 +2310,14 @@ CopyFrom(CopyState cstate)
tupDesc = RelationGetDescr(cstate->rel);
- /*----------
- * Check to see if we can avoid writing WAL
- *
- * If archive logging/streaming is not enabled *and* either
- * - table was created in same transaction as this COPY
- * - data is being written to relfilenode created in this transaction
- * then we can skip writing WAL. It's safe because if the transaction
- * doesn't commit, we'll discard the table (or the new relfilenode file).
- * If it does commit, we'll have done the heap_sync at the bottom of this
- * routine first.
- *
- * As mentioned in comments in utils/rel.h, the in-same-transaction test
- * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
- * can be cleared before the end of the transaction. The exact case is
- * when a relation sets a new relfilenode twice in same transaction, yet
- * the second one fails in an aborted subtransaction, e.g.
- *
- * BEGIN;
- * TRUNCATE t;
- * SAVEPOINT save;
- * TRUNCATE t;
- * ROLLBACK TO save;
- * COPY ...
- *
- * Also, if the target file is new-in-transaction, we assume that checking
- * FSM for free space is a waste of time, even if we must use WAL because
- * of archiving. This could possibly be wrong, but it's unlikely.
- *
- * The comments for heap_insert and RelationGetBufferForTuple specify that
- * skipping WAL logging is only safe if we ensure that our tuples do not
- * go into pages containing tuples from any other transactions --- but this
- * must be the case if we have a new table or new relfilenode, so we need
- * no additional work to enforce that.
- *----------
+ /*
+ * If the target file is new-in-transaction, we assume that checking FSM
+ * for free space is a waste of time. This could possibly be wrong, but
+ * it's unlikely.
*/
- /* createSubid is creation check, newRelfilenodeSubid is truncation check */
if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
- cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
- {
+ cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
hi_options |= HEAP_INSERT_SKIP_FSM;
- if (!XLogIsNeeded())
- hi_options |= HEAP_INSERT_SKIP_WAL;
- }
/*
* Optimize if new relfilenode was created in this subxact or one of its
@@ -2611,13 +2576,6 @@ CopyFrom(CopyState cstate)
FreeExecutorState(estate);
- /*
- * If we skipped writing WAL, then we need to sync the heap (but not
- * indexes since those use WAL anyway)
- */
- if (hi_options & HEAP_INSERT_SKIP_WAL)
- heap_sync(cstate->rel);
-
return processed;
}
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
index 5b4f6affcce..effcd8754fc 100644
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -562,16 +562,13 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
myState->rel = intoRelationDesc;
myState->reladdr = intoRelationAddr;
myState->output_cid = GetCurrentCommandId(true);
+ myState->hi_options = HEAP_INSERT_SKIP_FSM;
+ myState->bistate = GetBulkInsertState();
/*
- * We can skip WAL-logging the insertions, unless PITR or streaming
- * replication is in use. We can skip the FSM in any case.
+ * Valid smgr_targblock implies something already wrote to the relation.
+ * This may be harmless, but this function hasn't planned for it.
*/
- myState->hi_options = HEAP_INSERT_SKIP_FSM |
- (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
- myState->bistate = GetBulkInsertState();
-
- /* Not using WAL requires smgr_targblock be initially invalid */
Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
}
@@ -617,10 +614,6 @@ intorel_shutdown(DestReceiver *self)
FreeBulkInsertState(myState->bistate);
- /* If we skipped using WAL, must heap_sync before commit */
- if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
- heap_sync(myState->rel);
-
/* close rel, but keep lock until commit */
heap_close(myState->rel, NoLock);
myState->rel = NULL;
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index b73026ffa29..14eab0105f9 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -436,17 +436,13 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
*/
myState->transientrel = transientrel;
myState->output_cid = GetCurrentCommandId(true);
-
- /*
- * We can skip WAL-logging the insertions, unless PITR or streaming
- * replication is in use. We can skip the FSM in any case.
- */
myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN;
- if (!XLogIsNeeded())
- myState->hi_options |= HEAP_INSERT_SKIP_WAL;
myState->bistate = GetBulkInsertState();
- /* Not using WAL requires smgr_targblock be initially invalid */
+ /*
+ * Valid smgr_targblock implies something already wrote to the relation.
+ * This may be harmless, but this function hasn't planned for it.
+ */
Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
}
@@ -486,10 +482,6 @@ transientrel_shutdown(DestReceiver *self)
FreeBulkInsertState(myState->bistate);
- /* If we skipped using WAL, must heap_sync before commit */
- if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
- heap_sync(myState->transientrel);
-
/* close transientrel, but keep lock until commit */
heap_close(myState->transientrel, NoLock);
myState->transientrel = NULL;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 03720887e70..c6e9567fc29 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -4021,19 +4021,14 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
newrel = NULL;
/*
- * Prepare a BulkInsertState and options for heap_insert. Because we're
- * building a new heap, we can skip WAL-logging and fsync it to disk at
- * the end instead (unless WAL-logging is required for archiving or
- * streaming replication). The FSM is empty too, so don't bother using it.
+ * Prepare a BulkInsertState and options for heap_insert. The FSM is
+ * empty, so don't bother using it.
*/
if (newrel)
{
mycid = GetCurrentCommandId(true);
bistate = GetBulkInsertState();
-
hi_options = HEAP_INSERT_SKIP_FSM;
- if (!XLogIsNeeded())
- hi_options |= HEAP_INSERT_SKIP_WAL;
}
else
{
@@ -4283,10 +4278,6 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
{
FreeBulkInsertState(bistate);
- /* If we skipped writing WAL, then we need to sync the heap. */
- if (hi_options & HEAP_INSERT_SKIP_WAL)
- heap_sync(newrel);
-
heap_close(newrel, NoLock);
}
}
@@ -5988,14 +5979,19 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
/*
* If TryReuseIndex() stashed a relfilenode for us, we used it for the new
- * index instead of building from scratch. The DROP of the old edition of
- * this index will have scheduled the storage for deletion at commit, so
- * cancel that pending deletion.
+ * index instead of building from scratch. Restore associated fields.
+ * This may store InvalidSubTransactionId in both fields, in which case
+ * relcache.c will assume it can rebuild the relcache entry. Hence, do
+ * this after the CCI that made catalog rows visible to any rebuild. The
+ * DROP of the old edition of this index will have scheduled the storage
+ * for deletion at commit, so cancel that pending deletion.
*/
if (OidIsValid(stmt->oldNode))
{
Relation irel = index_open(address.objectId, NoLock);
+ irel->rd_createSubid = stmt->oldCreateSubid;
+ irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
RelationPreserveStorage(irel->rd_node, true);
index_close(irel, NoLock);
}
@@ -9134,6 +9130,8 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt)
Relation irel = index_open(oldId, NoLock);
stmt->oldNode = irel->rd_node.relNode;
+ stmt->oldCreateSubid = irel->rd_createSubid;
+ stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
index_close(irel, NoLock);
}
}
@@ -9979,6 +9977,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
heap_close(pg_class, RowExclusiveLock);
+ RelationAssumeNewRelfilenode(rel);
+
relation_close(rel, NoLock);
/* Make sure the reltablespace change is visible */
@@ -10193,7 +10193,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
/*
* We need to log the copied data in WAL iff WAL archiving/streaming is
- * enabled AND it's a permanent relation.
+ * enabled AND it's a permanent relation. This gives the same answer as
+ * "RelationNeedsWAL(rel) || copying_initfork", because we know the
+ * current operation created a new relfilenode.
*/
use_wal = XLogIsNeeded() &&
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
@@ -10235,21 +10237,15 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
}
/*
- * If the rel is WAL-logged, must fsync before commit. We use heap_sync
- * to ensure that the toast table gets fsync'd too. (For a temp or
- * unlogged rel we don't care since the data will be gone after a crash
- * anyway.)
- *
- * It's obvious that we must do this when not WAL-logging the copy. It's
- * less obvious that we have to do it even if we did WAL-log the copied
- * pages. The reason is that since we're copying outside shared buffers, a
- * CHECKPOINT occurring during the copy has no way to flush the previously
- * written data to disk (indeed it won't know the new rel even exists). A
- * crash later on would replay WAL from the checkpoint, therefore it
- * wouldn't replay our earlier WAL entries. If we do not fsync those pages
- * here, they might still not be on disk when the crash occurs.
- */
- if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
+ * When we WAL-logged rel pages, we must nonetheless fsync them. The
+ * reason is that since we're copying outside shared buffers, a CHECKPOINT
+ * occurring during the copy has no way to flush the previously written
+ * data to disk (indeed it won't know the new rel even exists). A crash
+ * later on would replay WAL from the checkpoint, therefore it wouldn't
+ * replay our earlier WAL entries. If we do not fsync those pages here,
+ * they might still not be on disk when the crash occurs.
+ */
+ if (use_wal || copying_initfork)
smgrimmedsync(dst, forkNum);
}