diff options
Diffstat (limited to 'src/backend/commands')
-rw-r--r-- | src/backend/commands/cluster.c | 29 | ||||
-rw-r--r-- | src/backend/commands/copy.c | 52 | ||||
-rw-r--r-- | src/backend/commands/createas.c | 15 | ||||
-rw-r--r-- | src/backend/commands/matview.c | 16 | ||||
-rw-r--r-- | src/backend/commands/tablecmds.c | 56 |
5 files changed, 61 insertions, 107 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 43b8f2b3042..c1b5cd4fd40 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -747,7 +747,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *isnull; IndexScanDesc indexScan; HeapScanDesc heapScan; - bool use_wal; bool is_system_catalog; TransactionId OldestXmin; TransactionId FreezeXid; @@ -803,12 +802,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock); /* - * We need to log the copied data in WAL iff WAL archiving/streaming is - * enabled AND it's a WAL-logged rel. + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. */ - use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap); - - /* use_wal off requires smgr_targblock be initially invalid */ Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber); /* @@ -876,7 +872,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, /* Initialize the rewrite operation */ rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid, - MultiXactCutoff, use_wal); + MultiXactCutoff); /* * Decide whether to use an indexscan or seqscan-and-optional-sort to scan @@ -1247,6 +1243,25 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, } /* + * Recognize that rel1's relfilenode (swapped from rel2) is new in this + * subtransaction. The rel2 storage (swapped from rel1) may or may not be + * new. + */ + { + Relation rel1, + rel2; + + rel1 = relation_open(r1, NoLock); + rel2 = relation_open(r2, NoLock); + rel2->rd_createSubid = rel1->rd_createSubid; + rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid; + rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid; + RelationAssumeNewRelfilenode(rel1); + relation_close(rel1, NoLock); + relation_close(rel2, NoLock); + } + + /* * In the case of a shared catalog, these next few steps will only affect * our own database's pg_class row; but that's okay, because they are all * noncritical updates. That's also an important fact for the case of a diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 92fee9a497b..4ff4d5fdd24 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2310,49 +2310,14 @@ CopyFrom(CopyState cstate) tupDesc = RelationGetDescr(cstate->rel); - /*---------- - * Check to see if we can avoid writing WAL - * - * If archive logging/streaming is not enabled *and* either - * - table was created in same transaction as this COPY - * - data is being written to relfilenode created in this transaction - * then we can skip writing WAL. It's safe because if the transaction - * doesn't commit, we'll discard the table (or the new relfilenode file). - * If it does commit, we'll have done the heap_sync at the bottom of this - * routine first. - * - * As mentioned in comments in utils/rel.h, the in-same-transaction test - * is not always set correctly, since in rare cases rd_newRelfilenodeSubid - * can be cleared before the end of the transaction. The exact case is - * when a relation sets a new relfilenode twice in same transaction, yet - * the second one fails in an aborted subtransaction, e.g. - * - * BEGIN; - * TRUNCATE t; - * SAVEPOINT save; - * TRUNCATE t; - * ROLLBACK TO save; - * COPY ... - * - * Also, if the target file is new-in-transaction, we assume that checking - * FSM for free space is a waste of time, even if we must use WAL because - * of archiving. This could possibly be wrong, but it's unlikely. - * - * The comments for heap_insert and RelationGetBufferForTuple specify that - * skipping WAL logging is only safe if we ensure that our tuples do not - * go into pages containing tuples from any other transactions --- but this - * must be the case if we have a new table or new relfilenode, so we need - * no additional work to enforce that. - *---------- + /* + * If the target file is new-in-transaction, we assume that checking FSM + * for free space is a waste of time. This could possibly be wrong, but + * it's unlikely. */ - /* createSubid is creation check, newRelfilenodeSubid is truncation check */ if (cstate->rel->rd_createSubid != InvalidSubTransactionId || - cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId) - { + cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId) hi_options |= HEAP_INSERT_SKIP_FSM; - if (!XLogIsNeeded()) - hi_options |= HEAP_INSERT_SKIP_WAL; - } /* * Optimize if new relfilenode was created in this subxact or one of its @@ -2611,13 +2576,6 @@ CopyFrom(CopyState cstate) FreeExecutorState(estate); - /* - * If we skipped writing WAL, then we need to sync the heap (but not - * indexes since those use WAL anyway) - */ - if (hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(cstate->rel); - return processed; } diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 5b4f6affcce..effcd8754fc 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -562,16 +562,13 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) myState->rel = intoRelationDesc; myState->reladdr = intoRelationAddr; myState->output_cid = GetCurrentCommandId(true); + myState->hi_options = HEAP_INSERT_SKIP_FSM; + myState->bistate = GetBulkInsertState(); /* - * We can skip WAL-logging the insertions, unless PITR or streaming - * replication is in use. We can skip the FSM in any case. + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. */ - myState->hi_options = HEAP_INSERT_SKIP_FSM | - (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL); - myState->bistate = GetBulkInsertState(); - - /* Not using WAL requires smgr_targblock be initially invalid */ Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); } @@ -617,10 +614,6 @@ intorel_shutdown(DestReceiver *self) FreeBulkInsertState(myState->bistate); - /* If we skipped using WAL, must heap_sync before commit */ - if (myState->hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(myState->rel); - /* close rel, but keep lock until commit */ heap_close(myState->rel, NoLock); myState->rel = NULL; diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index b73026ffa29..14eab0105f9 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -436,17 +436,13 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) */ myState->transientrel = transientrel; myState->output_cid = GetCurrentCommandId(true); - - /* - * We can skip WAL-logging the insertions, unless PITR or streaming - * replication is in use. We can skip the FSM in any case. - */ myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN; - if (!XLogIsNeeded()) - myState->hi_options |= HEAP_INSERT_SKIP_WAL; myState->bistate = GetBulkInsertState(); - /* Not using WAL requires smgr_targblock be initially invalid */ + /* + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. + */ Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber); } @@ -486,10 +482,6 @@ transientrel_shutdown(DestReceiver *self) FreeBulkInsertState(myState->bistate); - /* If we skipped using WAL, must heap_sync before commit */ - if (myState->hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(myState->transientrel); - /* close transientrel, but keep lock until commit */ heap_close(myState->transientrel, NoLock); myState->transientrel = NULL; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 03720887e70..c6e9567fc29 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -4021,19 +4021,14 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) newrel = NULL; /* - * Prepare a BulkInsertState and options for heap_insert. Because we're - * building a new heap, we can skip WAL-logging and fsync it to disk at - * the end instead (unless WAL-logging is required for archiving or - * streaming replication). The FSM is empty too, so don't bother using it. + * Prepare a BulkInsertState and options for heap_insert. The FSM is + * empty, so don't bother using it. */ if (newrel) { mycid = GetCurrentCommandId(true); bistate = GetBulkInsertState(); - hi_options = HEAP_INSERT_SKIP_FSM; - if (!XLogIsNeeded()) - hi_options |= HEAP_INSERT_SKIP_WAL; } else { @@ -4283,10 +4278,6 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) { FreeBulkInsertState(bistate); - /* If we skipped writing WAL, then we need to sync the heap. */ - if (hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(newrel); - heap_close(newrel, NoLock); } } @@ -5988,14 +5979,19 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, /* * If TryReuseIndex() stashed a relfilenode for us, we used it for the new - * index instead of building from scratch. The DROP of the old edition of - * this index will have scheduled the storage for deletion at commit, so - * cancel that pending deletion. + * index instead of building from scratch. Restore associated fields. + * This may store InvalidSubTransactionId in both fields, in which case + * relcache.c will assume it can rebuild the relcache entry. Hence, do + * this after the CCI that made catalog rows visible to any rebuild. The + * DROP of the old edition of this index will have scheduled the storage + * for deletion at commit, so cancel that pending deletion. */ if (OidIsValid(stmt->oldNode)) { Relation irel = index_open(address.objectId, NoLock); + irel->rd_createSubid = stmt->oldCreateSubid; + irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid; RelationPreserveStorage(irel->rd_node, true); index_close(irel, NoLock); } @@ -9134,6 +9130,8 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) Relation irel = index_open(oldId, NoLock); stmt->oldNode = irel->rd_node.relNode; + stmt->oldCreateSubid = irel->rd_createSubid; + stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid; index_close(irel, NoLock); } } @@ -9979,6 +9977,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) heap_close(pg_class, RowExclusiveLock); + RelationAssumeNewRelfilenode(rel); + relation_close(rel, NoLock); /* Make sure the reltablespace change is visible */ @@ -10193,7 +10193,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* * We need to log the copied data in WAL iff WAL archiving/streaming is - * enabled AND it's a permanent relation. + * enabled AND it's a permanent relation. This gives the same answer as + * "RelationNeedsWAL(rel) || copying_initfork", because we know the + * current operation created a new relfilenode. */ use_wal = XLogIsNeeded() && (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork); @@ -10235,21 +10237,15 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, } /* - * If the rel is WAL-logged, must fsync before commit. We use heap_sync - * to ensure that the toast table gets fsync'd too. (For a temp or - * unlogged rel we don't care since the data will be gone after a crash - * anyway.) - * - * It's obvious that we must do this when not WAL-logging the copy. It's - * less obvious that we have to do it even if we did WAL-log the copied - * pages. The reason is that since we're copying outside shared buffers, a - * CHECKPOINT occurring during the copy has no way to flush the previously - * written data to disk (indeed it won't know the new rel even exists). A - * crash later on would replay WAL from the checkpoint, therefore it - * wouldn't replay our earlier WAL entries. If we do not fsync those pages - * here, they might still not be on disk when the crash occurs. - */ - if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork) + * When we WAL-logged rel pages, we must nonetheless fsync them. The + * reason is that since we're copying outside shared buffers, a CHECKPOINT + * occurring during the copy has no way to flush the previously written + * data to disk (indeed it won't know the new rel even exists). A crash + * later on would replay WAL from the checkpoint, therefore it wouldn't + * replay our earlier WAL entries. If we do not fsync those pages here, + * they might still not be on disk when the crash occurs. + */ + if (use_wal || copying_initfork) smgrimmedsync(dst, forkNum); } |