diff options
Diffstat (limited to 'src/backend/utils/cache/relcache.c')
-rw-r--r-- | src/backend/utils/cache/relcache.c | 268 |
1 files changed, 216 insertions, 52 deletions
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 76f41dbe36c..9ee9dc8cc0c 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -257,6 +257,9 @@ static void RelationReloadIndexInfo(Relation relation); static void RelationReloadNailed(Relation relation); static void RelationFlushRelation(Relation relation); static void RememberToFreeTupleDescAtEOX(TupleDesc td); +#ifdef USE_ASSERT_CHECKING +static void AssertPendingSyncConsistency(Relation relation); +#endif static void AtEOXact_cleanup(Relation relation, bool isCommit); static void AtEOSubXact_cleanup(Relation relation, bool isCommit, SubTransactionId mySubid, SubTransactionId parentSubid); @@ -1093,6 +1096,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_isnailed = false; relation->rd_createSubid = InvalidSubTransactionId; relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; switch (relation->rd_rel->relpersistence) { case RELPERSISTENCE_UNLOGGED: @@ -1817,6 +1822,8 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_isnailed = true; relation->rd_createSubid = InvalidSubTransactionId; relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; relation->rd_backend = InvalidBackendId; relation->rd_islocaltemp = false; @@ -1989,6 +1996,13 @@ RelationIdGetRelation(Oid relationId) if (RelationIsValid(rd)) { + /* return NULL for dropped relations */ + if (rd->rd_droppedSubid != InvalidSubTransactionId) + { + Assert(!rd->rd_isvalid); + return NULL; + } + RelationIncrementReferenceCount(rd); /* revalidate cache entry if necessary */ if (!rd->rd_isvalid) @@ -2092,7 +2106,7 @@ RelationClose(Relation relation) #ifdef RELCACHE_FORCE_RELEASE if (RelationHasReferenceCountZero(relation) && relation->rd_createSubid == InvalidSubTransactionId && - relation->rd_newRelfilenodeSubid == InvalidSubTransactionId) + relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId) RelationClearRelation(relation, false); #endif } @@ -2131,10 +2145,11 @@ RelationReloadIndexInfo(Relation relation) HeapTuple pg_class_tuple; Form_pg_class relp; - /* Should be called only for invalidated indexes */ + /* Should be called only for invalidated, live indexes */ Assert((relation->rd_rel->relkind == RELKIND_INDEX || relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && - !relation->rd_isvalid); + !relation->rd_isvalid && + relation->rd_droppedSubid == InvalidSubTransactionId); /* Ensure it's closed at smgr level */ RelationCloseSmgr(relation); @@ -2430,6 +2445,13 @@ RelationClearRelation(Relation relation, bool rebuild) return; } + /* Mark it invalid until we've finished rebuild */ + relation->rd_isvalid = false; + + /* See RelationForgetRelation(). */ + if (relation->rd_droppedSubid != InvalidSubTransactionId) + return; + /* * Even non-system indexes should not be blown away if they are open and * have valid index support information. This avoids problems with active @@ -2442,15 +2464,11 @@ RelationClearRelation(Relation relation, bool rebuild) relation->rd_refcnt > 0 && relation->rd_indexcxt != NULL) { - relation->rd_isvalid = false; /* needs to be revalidated */ if (IsTransactionState()) RelationReloadIndexInfo(relation); return; } - /* Mark it invalid until we've finished rebuild */ - relation->rd_isvalid = false; - /* * If we're really done with the relcache entry, blow it away. But if * someone is still using it, reconstruct the whole deal without moving @@ -2508,13 +2526,13 @@ RelationClearRelation(Relation relation, bool rebuild) * problem. * * When rebuilding an open relcache entry, we must preserve ref count, - * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also - * attempt to preserve the pg_class entry (rd_rel), tupledesc, - * rewrite-rule, partition key, and partition descriptor substructures - * in place, because various places assume that these structures won't - * move while they are working with an open relcache entry. (Note: - * the refcount mechanism for tupledescs might someday allow us to - * remove this hack for the tupledesc.) + * rd_*Subid, and rd_toastoid state. Also attempt to preserve the + * pg_class entry (rd_rel), tupledesc, rewrite-rule, partition key, + * and partition descriptor substructures in place, because various + * places assume that these structures won't move while they are + * working with an open relcache entry. (Note: the refcount + * mechanism for tupledescs might someday allow us to remove this hack + * for the tupledesc.) * * Note that this process does not touch CurrentResourceOwner; which * is good because whatever ref counts the entry may have do not @@ -2594,6 +2612,8 @@ RelationClearRelation(Relation relation, bool rebuild) /* creation sub-XIDs must be preserved */ SWAPFIELD(SubTransactionId, rd_createSubid); SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid); + SWAPFIELD(SubTransactionId, rd_firstRelfilenodeSubid); + SWAPFIELD(SubTransactionId, rd_droppedSubid); /* un-swap rd_rel pointers, swap contents instead */ SWAPFIELD(Form_pg_class, rd_rel); /* ... but actually, we don't have to update newrel->rd_rel */ @@ -2672,12 +2692,12 @@ static void RelationFlushRelation(Relation relation) { if (relation->rd_createSubid != InvalidSubTransactionId || - relation->rd_newRelfilenodeSubid != InvalidSubTransactionId) + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId) { /* * New relcache entries are always rebuilt, not flushed; else we'd - * forget the "new" status of the relation, which is a useful - * optimization to have. Ditto for the new-relfilenode status. + * forget the "new" status of the relation. Ditto for the + * new-relfilenode status. * * The rel could have zero refcnt here, so temporarily increment the * refcnt to ensure it's safe to rebuild it. We can assume that the @@ -2699,10 +2719,7 @@ RelationFlushRelation(Relation relation) } /* - * RelationForgetRelation - unconditionally remove a relcache entry - * - * External interface for destroying a relcache entry when we - * drop the relation. + * RelationForgetRelation - caller reports that it dropped the relation */ void RelationForgetRelation(Oid rid) @@ -2717,7 +2734,19 @@ RelationForgetRelation(Oid rid) if (!RelationHasReferenceCountZero(relation)) elog(ERROR, "relation %u is still open", rid); - /* Unconditionally destroy the relcache entry */ + Assert(relation->rd_droppedSubid == InvalidSubTransactionId); + if (relation->rd_createSubid != InvalidSubTransactionId || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId) + { + /* + * In the event of subtransaction rollback, we must not forget + * rd_*Subid. Mark the entry "dropped" so RelationClearRelation() + * invalidates it in lieu of destroying it. (If we're in a top + * transaction, we could opt to destroy the entry.) + */ + relation->rd_droppedSubid = GetCurrentSubTransactionId(); + } + RelationClearRelation(relation, false); } @@ -2757,11 +2786,10 @@ RelationCacheInvalidateEntry(Oid relationId) * relation cache and re-read relation mapping data. * * This is currently used only to recover from SI message buffer overflow, - * so we do not touch new-in-transaction relations; they cannot be targets - * of cross-backend SI updates (and our own updates now go through a - * separate linked list that isn't limited by the SI message buffer size). - * Likewise, we need not discard new-relfilenode-in-transaction hints, - * since any invalidation of those would be a local event. + * so we do not touch relations having new-in-transaction relfilenodes; they + * cannot be targets of cross-backend SI updates (and our own updates now go + * through a separate linked list that isn't limited by the SI message + * buffer size). * * We do this in two phases: the first pass deletes deletable items, and * the second one rebuilds the rebuildable items. This is essential for @@ -2812,7 +2840,7 @@ RelationCacheInvalidate(void) * pending invalidations. */ if (relation->rd_createSubid != InvalidSubTransactionId || - relation->rd_newRelfilenodeSubid != InvalidSubTransactionId) + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId) continue; relcacheInvalsReceived++; @@ -2924,6 +2952,84 @@ RememberToFreeTupleDescAtEOX(TupleDesc td) EOXactTupleDescArray[NextEOXactTupleDescNum++] = td; } +#ifdef USE_ASSERT_CHECKING +static void +AssertPendingSyncConsistency(Relation relation) +{ + bool relcache_verdict = + relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && + ((relation->rd_createSubid != InvalidSubTransactionId && + RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId); + + Assert(relcache_verdict == RelFileNodeSkippingWAL(relation->rd_node)); + + if (relation->rd_droppedSubid != InvalidSubTransactionId) + Assert(!relation->rd_isvalid && + (relation->rd_createSubid != InvalidSubTransactionId || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)); +} + +/* + * AssertPendingSyncs_RelationCache + * + * Assert that relcache.c and storage.c agree on whether to skip WAL. + */ +void +AssertPendingSyncs_RelationCache(void) +{ + HASH_SEQ_STATUS status; + LOCALLOCK *locallock; + Relation *rels; + int maxrels; + int nrels; + RelIdCacheEnt *idhentry; + int i; + + /* + * Open every relation that this transaction has locked. If, for some + * relation, storage.c is skipping WAL and relcache.c is not skipping WAL, + * a CommandCounterIncrement() typically yields a local invalidation + * message that destroys the relcache entry. By recreating such entries + * here, we detect the problem. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + maxrels = 1; + rels = palloc(maxrels * sizeof(*rels)); + nrels = 0; + hash_seq_init(&status, GetLockMethodLocalHash()); + while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL) + { + Oid relid; + Relation r; + + if (locallock->nLocks <= 0) + continue; + if ((LockTagType) locallock->tag.lock.locktag_type != + LOCKTAG_RELATION) + continue; + relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2); + r = RelationIdGetRelation(relid); + if (!RelationIsValid(r)) + continue; + if (nrels >= maxrels) + { + maxrels *= 2; + rels = repalloc(rels, maxrels * sizeof(*rels)); + } + rels[nrels++] = r; + } + + hash_seq_init(&status, RelationIdCache); + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + AssertPendingSyncConsistency(idhentry->reldesc); + + for (i = 0; i < nrels; i++) + RelationClose(rels[i]); + PopActiveSnapshot(); +} +#endif + /* * AtEOXact_RelationCache * @@ -3006,6 +3112,8 @@ AtEOXact_RelationCache(bool isCommit) static void AtEOXact_cleanup(Relation relation, bool isCommit) { + bool clear_relcache = false; + /* * The relcache entry's ref count should be back to its normal * not-in-a-transaction state: 0 unless it's nailed in cache. @@ -3031,17 +3139,31 @@ AtEOXact_cleanup(Relation relation, bool isCommit) #endif /* - * Is it a relation created in the current transaction? + * Is the relation live after this transaction ends? * - * During commit, reset the flag to zero, since we are now out of the - * creating transaction. During abort, simply delete the relcache entry - * --- it isn't interesting any longer. + * During commit, clear the relcache entry if it is preserved after + * relation drop, in order not to orphan the entry. During rollback, + * clear the relcache entry if the relation is created in the current + * transaction since it isn't interesting any longer once we are out of + * the transaction. + */ + clear_relcache = + (isCommit ? + relation->rd_droppedSubid != InvalidSubTransactionId : + relation->rd_createSubid != InvalidSubTransactionId); + + /* + * Since we are now out of the transaction, reset the subids to zero. + * That also lets RelationClearRelation() drop the relcache entry. */ - if (relation->rd_createSubid != InvalidSubTransactionId) + relation->rd_createSubid = InvalidSubTransactionId; + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; + + if (clear_relcache) { - if (isCommit) - relation->rd_createSubid = InvalidSubTransactionId; - else if (RelationHasReferenceCountZero(relation)) + if (RelationHasReferenceCountZero(relation)) { RelationClearRelation(relation, false); return; @@ -3056,16 +3178,10 @@ AtEOXact_cleanup(Relation relation, bool isCommit) * eventually. This must be just a WARNING to avoid * error-during-error-recovery loops. */ - relation->rd_createSubid = InvalidSubTransactionId; elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount", RelationGetRelationName(relation)); } } - - /* - * Likewise, reset the hint about the relfilenode being new. - */ - relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; } /* @@ -3129,15 +3245,28 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit, /* * Is it a relation created in the current subtransaction? * - * During subcommit, mark it as belonging to the parent, instead. During - * subabort, simply delete the relcache entry. + * During subcommit, mark it as belonging to the parent, instead, as long + * as it has not been dropped. Otherwise simply delete the relcache entry. + * --- it isn't interesting any longer. */ if (relation->rd_createSubid == mySubid) { - if (isCommit) + /* + * Valid rd_droppedSubid means the corresponding relation is dropped + * but the relcache entry is preserved for at-commit pending sync. We + * need to drop it explicitly here not to make the entry orphan. + */ + Assert(relation->rd_droppedSubid == mySubid || + relation->rd_droppedSubid == InvalidSubTransactionId); + if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId) relation->rd_createSubid = parentSubid; else if (RelationHasReferenceCountZero(relation)) { + /* allow the entry to be removed */ + relation->rd_createSubid = InvalidSubTransactionId; + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; RelationClearRelation(relation, false); return; } @@ -3157,7 +3286,8 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit, } /* - * Likewise, update or drop any new-relfilenode-in-subtransaction hint. + * Likewise, update or drop any new-relfilenode-in-subtransaction record + * or drop record. */ if (relation->rd_newRelfilenodeSubid == mySubid) { @@ -3166,6 +3296,22 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit, else relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; } + + if (relation->rd_firstRelfilenodeSubid == mySubid) + { + if (isCommit) + relation->rd_firstRelfilenodeSubid = parentSubid; + else + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + } + + if (relation->rd_droppedSubid == mySubid) + { + if (isCommit) + relation->rd_droppedSubid = parentSubid; + else + relation->rd_droppedSubid = InvalidSubTransactionId; + } } @@ -3255,6 +3401,7 @@ RelationBuildLocalRelation(const char *relname, /* it's being created in this transaction */ rel->rd_createSubid = GetCurrentSubTransactionId(); rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId; /* * create a new tuple descriptor from the one passed in. We do this @@ -3552,14 +3699,29 @@ RelationSetNewRelfilenode(Relation relation, char persistence) */ CommandCounterIncrement(); - /* - * Mark the rel as having been given a new relfilenode in the current - * (sub) transaction. This is a hint that can be used to optimize later - * operations on the rel in the same transaction. - */ + RelationAssumeNewRelfilenode(relation); +} + +/* + * RelationAssumeNewRelfilenode + * + * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call + * this. The call shall precede any code that might insert WAL records whose + * replay would modify bytes in the new RelFileNode, and the call shall follow + * any WAL modifying bytes in the prior RelFileNode. See struct RelationData. + * Ideally, call this as near as possible to the CommandCounterIncrement() + * that makes the pg_class change visible (before it or after it); that + * minimizes the chance of future development adding a forbidden WAL insertion + * between RelationAssumeNewRelfilenode() and CommandCounterIncrement(). + */ +void +RelationAssumeNewRelfilenode(Relation relation) +{ relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId(); + if (relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId) + relation->rd_firstRelfilenodeSubid = relation->rd_newRelfilenodeSubid; - /* Flag relation as needing eoxact cleanup (to remove the hint) */ + /* Flag relation as needing eoxact cleanup (to clear these fields) */ EOXactListAdd(relation); } @@ -5625,6 +5787,8 @@ load_relcache_init_file(bool shared) rel->rd_fkeylist = NIL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_droppedSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info)); |