summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/heap/heapam.c11
-rw-r--r--src/backend/access/index/indexam.c10
-rw-r--r--src/backend/access/transam/xlog.c234
-rw-r--r--src/backend/catalog/storage.c2
-rw-r--r--src/backend/commands/typecmds.c24
-rw-r--r--src/backend/storage/ipc/shmem.c4
-rw-r--r--src/backend/storage/lmgr/lock.c2
-rw-r--r--src/backend/storage/lmgr/predicate.c2
-rw-r--r--src/backend/utils/activity/wait_event_names.txt2
-rw-r--r--src/backend/utils/adt/dbsize.c3
-rw-r--r--src/backend/utils/cache/relfilenumbermap.c8
-rw-r--r--src/backend/utils/hash/dynahash.c92
-rw-r--r--src/backend/utils/time/snapmgr.c19
13 files changed, 161 insertions, 252 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0dcd6ee817e..7491cc3cb93 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1143,6 +1143,17 @@ heap_beginscan(Relation relation, Snapshot snapshot,
if (!(snapshot && IsMVCCSnapshot(snapshot)))
scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
+ /* Check that a historic snapshot is not used for non-catalog tables */
+ if (snapshot &&
+ IsHistoricMVCCSnapshot(snapshot) &&
+ !RelationIsAccessibleInLogicalDecoding(relation))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot query non-catalog table \"%s\" during logical decoding",
+ RelationGetRelationName(relation))));
+ }
+
/*
* For seqscan and sample scans in a serializable transaction, acquire a
* predicate lock on the entire relation. This is required not only to
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 1a4f36fe0a9..86d11f4ec79 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -263,6 +263,16 @@ index_beginscan(Relation heapRelation,
Assert(snapshot != InvalidSnapshot);
+ /* Check that a historic snapshot is not used for non-catalog tables */
+ if (IsHistoricMVCCSnapshot(snapshot) &&
+ !RelationIsAccessibleInLogicalDecoding(heapRelation))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot query non-catalog table \"%s\" during logical decoding",
+ RelationGetRelationName(heapRelation))));
+ }
+
scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false);
/*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e8909406686..7ffb2179151 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -303,6 +303,11 @@ static bool doPageWrites;
* so it's a plain spinlock. The other locks are held longer (potentially
* over I/O operations), so we use LWLocks for them. These locks are:
*
+ * WALBufMappingLock: must be held to replace a page in the WAL buffer cache.
+ * It is only held while initializing and changing the mapping. If the
+ * contents of the buffer being replaced haven't been written yet, the mapping
+ * lock is released while the write is done, and reacquired afterwards.
+ *
* WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or
* XLogFlush).
*
@@ -468,37 +473,21 @@ typedef struct XLogCtlData
pg_atomic_uint64 logFlushResult; /* last byte + 1 flushed */
/*
- * First initialized page in the cache (first byte position).
- */
- XLogRecPtr InitializedFrom;
-
- /*
- * Latest reserved for initialization page in the cache (last byte
- * position + 1).
+ * Latest initialized page in the cache (last byte position + 1).
*
- * To change the identity of a buffer, you need to advance
- * InitializeReserved first. To change the identity of a buffer that's
+ * To change the identity of a buffer (and InitializedUpTo), you need to
+ * hold WALBufMappingLock. To change the identity of a buffer that's
* still dirty, the old page needs to be written out first, and for that
* you need WALWriteLock, and you need to ensure that there are no
* in-progress insertions to the page by calling
* WaitXLogInsertionsToFinish().
*/
- pg_atomic_uint64 InitializeReserved;
-
- /*
- * Latest initialized page in the cache (last byte position + 1).
- *
- * InitializedUpTo is updated after the buffer initialization. After
- * update, waiters got notification using InitializedUpToCondVar.
- */
- pg_atomic_uint64 InitializedUpTo;
- ConditionVariable InitializedUpToCondVar;
+ XLogRecPtr InitializedUpTo;
/*
* These values do not change after startup, although the pointed-to pages
- * and xlblocks values certainly do. xlblocks values are changed
- * lock-free according to the check for the xlog write position and are
- * accompanied by changes of InitializeReserved and InitializedUpTo.
+ * and xlblocks values certainly do. xlblocks values are protected by
+ * WALBufMappingLock.
*/
char *pages; /* buffers for unwritten XLOG pages */
pg_atomic_uint64 *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */
@@ -821,9 +810,9 @@ XLogInsertRecord(XLogRecData *rdata,
* fullPageWrites from changing until the insertion is finished.
*
* Step 2 can usually be done completely in parallel. If the required WAL
- * page is not initialized yet, you have to go through AdvanceXLInsertBuffer,
- * which will ensure it is initialized. But the WAL writer tries to do that
- * ahead of insertions to avoid that from happening in the critical path.
+ * page is not initialized yet, you have to grab WALBufMappingLock to
+ * initialize it, but the WAL writer tries to do that ahead of insertions
+ * to avoid that from happening in the critical path.
*
*----------
*/
@@ -2005,79 +1994,32 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
XLogRecPtr NewPageBeginPtr;
XLogPageHeader NewPage;
- XLogRecPtr ReservedPtr;
int npages pg_attribute_unused() = 0;
- /*
- * We must run the loop below inside the critical section as we expect
- * XLogCtl->InitializedUpTo to eventually keep up. The most of callers
- * already run inside the critical section. Except for WAL writer, which
- * passed 'opportunistic == true', and therefore we don't perform
- * operations that could error out.
- *
- * Start an explicit critical section anyway though.
- */
- Assert(CritSectionCount > 0 || opportunistic);
- START_CRIT_SECTION();
+ LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
- /*--
- * Loop till we get all the pages in WAL buffer before 'upto' reserved for
- * initialization. Multiple process can initialize different buffers with
- * this loop in parallel as following.
- *
- * 1. Reserve page for initialization using XLogCtl->InitializeReserved.
- * 2. Initialize the reserved page.
- * 3. Attempt to advance XLogCtl->InitializedUpTo,
+ /*
+ * Now that we have the lock, check if someone initialized the page
+ * already.
*/
- ReservedPtr = pg_atomic_read_u64(&XLogCtl->InitializeReserved);
- while (upto >= ReservedPtr || opportunistic)
+ while (upto >= XLogCtl->InitializedUpTo || opportunistic)
{
- Assert(ReservedPtr % XLOG_BLCKSZ == 0);
+ nextidx = XLogRecPtrToBufIdx(XLogCtl->InitializedUpTo);
/*
- * Get ending-offset of the buffer page we need to replace.
- *
- * We don't lookup into xlblocks, but rather calculate position we
- * must wait to be written. If it was written, xlblocks will have this
- * position (or uninitialized)
+ * Get ending-offset of the buffer page we need to replace (this may
+ * be zero if the buffer hasn't been used yet). Fall through if it's
+ * already written out.
*/
- if (ReservedPtr + XLOG_BLCKSZ > XLogCtl->InitializedFrom + XLOG_BLCKSZ * XLOGbuffers)
- OldPageRqstPtr = ReservedPtr + XLOG_BLCKSZ - (XLogRecPtr) XLOG_BLCKSZ * XLOGbuffers;
- else
- OldPageRqstPtr = InvalidXLogRecPtr;
-
- if (LogwrtResult.Write < OldPageRqstPtr && opportunistic)
+ OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
+ if (LogwrtResult.Write < OldPageRqstPtr)
{
/*
- * If we just want to pre-initialize as much as we can without
- * flushing, give up now.
+ * Nope, got work to do. If we just want to pre-initialize as much
+ * as we can without flushing, give up now.
*/
- upto = ReservedPtr - 1;
- break;
- }
-
- /*
- * Attempt to reserve the page for initialization. Failure means that
- * this page got reserved by another process.
- */
- if (!pg_atomic_compare_exchange_u64(&XLogCtl->InitializeReserved,
- &ReservedPtr,
- ReservedPtr + XLOG_BLCKSZ))
- continue;
-
- /*
- * Wait till page gets correctly initialized up to OldPageRqstPtr.
- */
- nextidx = XLogRecPtrToBufIdx(ReservedPtr);
- while (pg_atomic_read_u64(&XLogCtl->InitializedUpTo) < OldPageRqstPtr)
- ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT);
- ConditionVariableCancelSleep();
- Assert(pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) == OldPageRqstPtr);
-
- /* Fall through if it's already written out. */
- if (LogwrtResult.Write < OldPageRqstPtr)
- {
- /* Nope, got work to do. */
+ if (opportunistic)
+ break;
/* Advance shared memory write request position */
SpinLockAcquire(&XLogCtl->info_lck);
@@ -2092,6 +2034,14 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
RefreshXLogWriteResult(LogwrtResult);
if (LogwrtResult.Write < OldPageRqstPtr)
{
+ /*
+ * Must acquire write lock. Release WALBufMappingLock first,
+ * to make sure that all insertions that we need to wait for
+ * can finish (up to this same position). Otherwise we risk
+ * deadlock.
+ */
+ LWLockRelease(WALBufMappingLock);
+
WaitXLogInsertionsToFinish(OldPageRqstPtr);
LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
@@ -2119,6 +2069,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
*/
pgstat_report_fixed = true;
}
+ /* Re-acquire WALBufMappingLock and retry */
+ LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
+ continue;
}
}
@@ -2126,9 +2079,11 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
* Now the next buffer slot is free and we can set it up to be the
* next output page.
*/
- NewPageBeginPtr = ReservedPtr;
+ NewPageBeginPtr = XLogCtl->InitializedUpTo;
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
+ Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
+
NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
/*
@@ -2192,100 +2147,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
*/
pg_write_barrier();
- /*-----
- * Update the value of XLogCtl->xlblocks[nextidx] and try to advance
- * XLogCtl->InitializedUpTo in a lock-less manner.
- *
- * First, let's provide a formal proof of the algorithm. Let it be 'n'
- * process with the following variables in shared memory:
- * f - an array of 'n' boolean flags,
- * v - atomic integer variable.
- *
- * Also, let
- * i - a number of a process,
- * j - local integer variable,
- * CAS(var, oldval, newval) - compare-and-swap atomic operation
- * returning true on success,
- * write_barrier()/read_barrier() - memory barriers.
- *
- * The pseudocode for each process is the following.
- *
- * j := i
- * f[i] := true
- * write_barrier()
- * while CAS(v, j, j + 1):
- * j := j + 1
- * read_barrier()
- * if not f[j]:
- * break
- *
- * Let's prove that v eventually reaches the value of n.
- * 1. Prove by contradiction. Assume v doesn't reach n and stucks
- * on k, where k < n.
- * 2. Process k attempts CAS(v, k, k + 1). 1). If, as we assumed, v
- * gets stuck at k, then this CAS operation must fail. Therefore,
- * v < k when process k attempts CAS(v, k, k + 1).
- * 3. If, as we assumed, v gets stuck at k, then the value k of v
- * must be achieved by some process m, where m < k. The process
- * m must observe f[k] == false. Otherwise, it will later attempt
- * CAS(v, k, k + 1) with success.
- * 4. Therefore, corresponding read_barrier() (while j == k) on
- * process m reached before write_barrier() of process k. But then
- * process k attempts CAS(v, k, k + 1) after process m successfully
- * incremented v to k, and that CAS operation must succeed.
- * That leads to a contradiction. So, there is no such k (k < n)
- * where v gets stuck. Q.E.D.
- *
- * To apply this proof to the code below, we assume
- * XLogCtl->InitializedUpTo will play the role of v with XLOG_BLCKSZ
- * granularity. We also assume setting XLogCtl->xlblocks[nextidx] to
- * NewPageEndPtr to play the role of setting f[i] to true. Also, note
- * that processes can't concurrently map different xlog locations to
- * the same nextidx because we previously requested that
- * XLogCtl->InitializedUpTo >= OldPageRqstPtr. So, a xlog buffer can
- * be taken for initialization only once the previous initialization
- * takes effect on XLogCtl->InitializedUpTo.
- */
-
pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
-
- pg_write_barrier();
-
- while (pg_atomic_compare_exchange_u64(&XLogCtl->InitializedUpTo, &NewPageBeginPtr, NewPageEndPtr))
- {
- NewPageBeginPtr = NewPageEndPtr;
- NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
- nextidx = XLogRecPtrToBufIdx(NewPageBeginPtr);
-
- pg_read_barrier();
-
- if (pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) != NewPageEndPtr)
- {
- /*
- * Page at nextidx wasn't initialized yet, so we can't move
- * InitializedUpto further. It will be moved by backend which
- * will initialize nextidx.
- */
- ConditionVariableBroadcast(&XLogCtl->InitializedUpToCondVar);
- break;
- }
- }
+ XLogCtl->InitializedUpTo = NewPageEndPtr;
npages++;
}
-
- END_CRIT_SECTION();
-
- /*
- * All the pages in WAL buffer before 'upto' were reserved for
- * initialization. However, some pages might be reserved by concurrent
- * processes. Wait till they finish initialization.
- */
- while (upto >= pg_atomic_read_u64(&XLogCtl->InitializedUpTo))
- ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT);
- ConditionVariableCancelSleep();
-
- pg_read_barrier();
+ LWLockRelease(WALBufMappingLock);
#ifdef WAL_DEBUG
if (XLOG_DEBUG && npages > 0)
@@ -5178,10 +5045,6 @@ XLOGShmemInit(void)
pg_atomic_init_u64(&XLogCtl->logWriteResult, InvalidXLogRecPtr);
pg_atomic_init_u64(&XLogCtl->logFlushResult, InvalidXLogRecPtr);
pg_atomic_init_u64(&XLogCtl->unloggedLSN, InvalidXLogRecPtr);
-
- pg_atomic_init_u64(&XLogCtl->InitializeReserved, InvalidXLogRecPtr);
- pg_atomic_init_u64(&XLogCtl->InitializedUpTo, InvalidXLogRecPtr);
- ConditionVariableInit(&XLogCtl->InitializedUpToCondVar);
}
/*
@@ -6205,8 +6068,7 @@ StartupXLOG(void)
memset(page + len, 0, XLOG_BLCKSZ - len);
pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
- pg_atomic_write_u64(&XLogCtl->InitializedUpTo, endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
- XLogCtl->InitializedFrom = endOfRecoveryInfo->lastPageBeginPtr;
+ XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
}
else
{
@@ -6215,10 +6077,8 @@ StartupXLOG(void)
* let the first attempt to insert a log record to initialize the next
* buffer.
*/
- pg_atomic_write_u64(&XLogCtl->InitializedUpTo, EndOfLog);
- XLogCtl->InitializedFrom = EndOfLog;
+ XLogCtl->InitializedUpTo = EndOfLog;
}
- pg_atomic_write_u64(&XLogCtl->InitializeReserved, pg_atomic_read_u64(&XLogCtl->InitializedUpTo));
/*
* Update local and shared status. This is OK to do without any locks
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 227df90f89c..fb784acf4af 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -586,7 +586,7 @@ RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Size
EstimatePendingSyncsSpace(void)
{
- long entries;
+ int64 entries;
entries = pendingSyncHash ? hash_get_num_entries(pendingSyncHash) : 0;
return mul_size(1 + entries, sizeof(RelFileLocator));
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 26d985193ae..c6de04819f1 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -126,7 +126,7 @@ static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid);
static Oid findRangeSubOpclass(List *opcname, Oid subtype);
static Oid findRangeCanonicalFunction(List *procname, Oid typeOid);
static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype);
-static void validateDomainCheckConstraint(Oid domainoid, const char *ccbin);
+static void validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmode);
static void validateDomainNotNullConstraint(Oid domainoid);
static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode);
static void checkEnumOwner(HeapTuple tup);
@@ -2986,7 +2986,7 @@ AlterDomainAddConstraint(List *names, Node *newConstraint,
* to.
*/
if (!constr->skip_validation)
- validateDomainCheckConstraint(domainoid, ccbin);
+ validateDomainCheckConstraint(domainoid, ccbin, ShareLock);
/*
* We must send out an sinval message for the domain, to ensure that
@@ -3098,7 +3098,12 @@ AlterDomainValidateConstraint(List *names, const char *constrName)
val = SysCacheGetAttrNotNull(CONSTROID, tuple, Anum_pg_constraint_conbin);
conbin = TextDatumGetCString(val);
- validateDomainCheckConstraint(domainoid, conbin);
+ /*
+ * Locking related relations with ShareUpdateExclusiveLock is ok because
+ * not-yet-valid constraints are still enforced against concurrent inserts
+ * or updates.
+ */
+ validateDomainCheckConstraint(domainoid, conbin, ShareUpdateExclusiveLock);
/*
* Now update the catalog, while we have the door open.
@@ -3191,9 +3196,16 @@ validateDomainNotNullConstraint(Oid domainoid)
/*
* Verify that all columns currently using the domain satisfy the given check
* constraint expression.
+ *
+ * It is used to validate existing constraints and to add newly created check
+ * constraints to a domain.
+ *
+ * The lockmode is used for relations using the domain. It should be
+ * ShareLock when adding a new constraint to domain. It can be
+ * ShareUpdateExclusiveLock when validating an existing constraint.
*/
static void
-validateDomainCheckConstraint(Oid domainoid, const char *ccbin)
+validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmode)
{
Expr *expr = (Expr *) stringToNode(ccbin);
List *rels;
@@ -3210,9 +3222,7 @@ validateDomainCheckConstraint(Oid domainoid, const char *ccbin)
exprstate = ExecPrepareExpr(expr, estate);
/* Fetch relation list with attributes based on this domain */
- /* ShareLock is sufficient to prevent concurrent data changes */
-
- rels = get_rels_with_domain(domainoid, ShareLock);
+ rels = get_rels_with_domain(domainoid, lockmode);
foreach(rt, rels)
{
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index d12a3ca0684..a0770e86796 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -330,8 +330,8 @@ InitShmemIndex(void)
*/
HTAB *
ShmemInitHash(const char *name, /* table string name for shmem index */
- long init_size, /* initial table size */
- long max_size, /* max size of the table */
+ int64 init_size, /* initial table size */
+ int64 max_size, /* max size of the table */
HASHCTL *infoP, /* info about key and bucket size */
int hash_flags) /* info about infoP */
{
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index f8c88147160..233b85b623d 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -443,7 +443,7 @@ void
LockManagerShmemInit(void)
{
HASHCTL info;
- long init_table_size,
+ int64 init_table_size,
max_table_size;
bool found;
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index c07fb588355..c1d8511ad17 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -1145,7 +1145,7 @@ void
PredicateLockShmemInit(void)
{
HASHCTL info;
- long max_table_size;
+ int64 max_table_size;
Size requestSize;
bool found;
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 0be307d2ca0..5427da5bc1b 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -156,7 +156,6 @@ REPLICATION_SLOT_DROP "Waiting for a replication slot to become inactive so it c
RESTORE_COMMAND "Waiting for <xref linkend="guc-restore-command"/> to complete."
SAFE_SNAPSHOT "Waiting to obtain a valid snapshot for a <literal>READ ONLY DEFERRABLE</literal> transaction."
SYNC_REP "Waiting for confirmation from a remote server during synchronous replication."
-WAL_BUFFER_INIT "Waiting on WAL buffer to be initialized."
WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit."
WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication."
WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated."
@@ -316,6 +315,7 @@ XidGen "Waiting to allocate a new transaction ID."
ProcArray "Waiting to access the shared per-process data structures (typically, to get a snapshot or report a session's transaction ID)."
SInvalRead "Waiting to retrieve messages from the shared catalog invalidation queue."
SInvalWrite "Waiting to add a message to the shared catalog invalidation queue."
+WALBufMapping "Waiting to replace a page in WAL buffers."
WALWrite "Waiting for WAL buffers to be written to disk."
ControlFile "Waiting to read or update the <filename>pg_control</filename> file or create a new WAL file."
MultiXactGen "Waiting to read or update shared multixact state."
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 25865b660ef..3a059f4ace0 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -938,6 +938,9 @@ pg_relation_filenode(PG_FUNCTION_ARGS)
*
* We don't fail but return NULL if we cannot find a mapping.
*
+ * Temporary relations are not detected, returning NULL (see
+ * RelidByRelfilenumber() for the reasons).
+ *
* InvalidOid can be passed instead of the current database's default
* tablespace.
*/
diff --git a/src/backend/utils/cache/relfilenumbermap.c b/src/backend/utils/cache/relfilenumbermap.c
index 8a2f6f8c693..0b6f9cf3fa1 100644
--- a/src/backend/utils/cache/relfilenumbermap.c
+++ b/src/backend/utils/cache/relfilenumbermap.c
@@ -130,6 +130,11 @@ InitializeRelfilenumberMap(void)
* Map a relation's (tablespace, relfilenumber) to a relation's oid and cache
* the result.
*
+ * A temporary relation may share its relfilenumber with a permanent relation
+ * or temporary relations created in other backends. Being able to uniquely
+ * identify a temporary relation would require a backend's proc number, which
+ * we do not know about. Hence, this function ignores this case.
+ *
* Returns InvalidOid if no relation matching the criteria could be found.
*/
Oid
@@ -208,6 +213,9 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
{
Form_pg_class classform = (Form_pg_class) GETSTRUCT(ntp);
+ if (classform->relpersistence == RELPERSISTENCE_TEMP)
+ continue;
+
if (found)
elog(ERROR,
"unexpected duplicate for tablespace %u, relfilenumber %u",
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a7094917c20..1aeee5be42a 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -154,7 +154,7 @@ typedef HASHBUCKET *HASHSEGMENT;
typedef struct
{
slock_t mutex; /* spinlock for this freelist */
- long nentries; /* number of entries in associated buckets */
+ int64 nentries; /* number of entries in associated buckets */
HASHELEMENT *freeList; /* chain of free elements */
} FreeListData;
@@ -182,8 +182,8 @@ struct HASHHDR
/* These fields can change, but not in a partitioned table */
/* Also, dsize can't change in a shared table, even if unpartitioned */
- long dsize; /* directory size */
- long nsegs; /* number of allocated segments (<= dsize) */
+ int64 dsize; /* directory size */
+ int64 nsegs; /* number of allocated segments (<= dsize) */
uint32 max_bucket; /* ID of maximum bucket in use */
uint32 high_mask; /* mask to modulo into entire table */
uint32 low_mask; /* mask to modulo into lower half of table */
@@ -191,9 +191,9 @@ struct HASHHDR
/* These fields are fixed at hashtable creation */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
- long num_partitions; /* # partitions (must be power of 2), or 0 */
- long max_dsize; /* 'dsize' limit if directory is fixed size */
- long ssize; /* segment size --- must be power of 2 */
+ int64 num_partitions; /* # partitions (must be power of 2), or 0 */
+ int64 max_dsize; /* 'dsize' limit if directory is fixed size */
+ int64 ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
int nelem_alloc; /* number of entries to allocate at once */
bool isfixed; /* if true, don't enlarge */
@@ -236,7 +236,7 @@ struct HTAB
/* We keep local copies of these fixed values to reduce contention */
Size keysize; /* hash key length in bytes */
- long ssize; /* segment size --- must be power of 2 */
+ int64 ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
/*
@@ -277,12 +277,12 @@ static bool expand_table(HTAB *hashp);
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
-static bool init_htab(HTAB *hashp, long nelem);
+static bool init_htab(HTAB *hashp, int64 nelem);
pg_noreturn static void hash_corrupted(HTAB *hashp);
static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
HASHBUCKET **bucketptr);
-static long next_pow2_long(long num);
-static int next_pow2_int(long num);
+static int64 next_pow2_int64(int64 num);
+static int next_pow2_int(int64 num);
static void register_seq_scan(HTAB *hashp);
static void deregister_seq_scan(HTAB *hashp);
static bool has_seq_scans(HTAB *hashp);
@@ -355,7 +355,7 @@ string_compare(const char *key1, const char *key2, Size keysize)
* large nelem will penalize hash_seq_search speed without buying much.
*/
HTAB *
-hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
+hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
{
HTAB *hashp;
HASHHDR *hctl;
@@ -697,7 +697,7 @@ choose_nelem_alloc(Size entrysize)
* arrays
*/
static bool
-init_htab(HTAB *hashp, long nelem)
+init_htab(HTAB *hashp, int64 nelem)
{
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT *segp;
@@ -780,10 +780,10 @@ init_htab(HTAB *hashp, long nelem)
* NB: assumes that all hash structure parameters have default values!
*/
Size
-hash_estimate_size(long num_entries, Size entrysize)
+hash_estimate_size(int64 num_entries, Size entrysize)
{
Size size;
- long nBuckets,
+ int64 nBuckets,
nSegments,
nDirEntries,
nElementAllocs,
@@ -791,9 +791,9 @@ hash_estimate_size(long num_entries, Size entrysize)
elementAllocCnt;
/* estimate number of buckets wanted */
- nBuckets = next_pow2_long(num_entries);
+ nBuckets = next_pow2_int64(num_entries);
/* # of segments needed for nBuckets */
- nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
+ nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
/* directory entries */
nDirEntries = DEF_DIRSIZE;
while (nDirEntries < nSegments)
@@ -826,17 +826,17 @@ hash_estimate_size(long num_entries, Size entrysize)
*
* XXX this had better agree with the behavior of init_htab()...
*/
-long
-hash_select_dirsize(long num_entries)
+int64
+hash_select_dirsize(int64 num_entries)
{
- long nBuckets,
+ int64 nBuckets,
nSegments,
nDirEntries;
/* estimate number of buckets wanted */
- nBuckets = next_pow2_long(num_entries);
+ nBuckets = next_pow2_int64(num_entries);
/* # of segments needed for nBuckets */
- nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
+ nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
/* directory entries */
nDirEntries = DEF_DIRSIZE;
while (nDirEntries < nSegments)
@@ -887,7 +887,7 @@ hash_stats(const char *caller, HTAB *hashp)
HASHHDR *hctl = hashp->hctl;
elog(DEBUG4,
- "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: %ld Key Size: %zu Max Bucket: %u Segment Count: %ld",
+ "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: " INT64_FORMAT " Key Size: %zu Max Bucket: %u Segment Count: " INT64_FORMAT,
caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses,
hctl->collisions, hctl->expansions, hash_get_num_entries(hashp),
hctl->keysize, hctl->max_bucket, hctl->nsegs);
@@ -993,7 +993,7 @@ hash_search_with_hash_value(HTAB *hashp,
* Can't split if running in partitioned mode, nor if frozen, nor if
* table is the subject of any active hash_seq_search scans.
*/
- if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
+ if (hctl->freeList[0].nentries > (int64) hctl->max_bucket &&
!IS_PARTITIONED(hctl) && !hashp->frozen &&
!has_seq_scans(hashp))
(void) expand_table(hashp);
@@ -1332,11 +1332,11 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
/*
* hash_get_num_entries -- get the number of entries in a hashtable
*/
-long
+int64
hash_get_num_entries(HTAB *hashp)
{
int i;
- long sum = hashp->hctl->freeList[0].nentries;
+ int64 sum = hashp->hctl->freeList[0].nentries;
/*
* We currently don't bother with acquiring the mutexes; it's only
@@ -1417,9 +1417,9 @@ hash_seq_search(HASH_SEQ_STATUS *status)
HTAB *hashp;
HASHHDR *hctl;
uint32 max_bucket;
- long ssize;
- long segment_num;
- long segment_ndx;
+ int64 ssize;
+ int64 segment_num;
+ int64 segment_ndx;
HASHSEGMENT segp;
uint32 curBucket;
HASHELEMENT *curElem;
@@ -1548,11 +1548,11 @@ expand_table(HTAB *hashp)
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT old_seg,
new_seg;
- long old_bucket,
+ int64 old_bucket,
new_bucket;
- long new_segnum,
+ int64 new_segnum,
new_segndx;
- long old_segnum,
+ int64 old_segnum,
old_segndx;
HASHBUCKET *oldlink,
*newlink;
@@ -1620,7 +1620,7 @@ expand_table(HTAB *hashp)
currElement = nextElement)
{
nextElement = currElement->link;
- if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
+ if ((int64) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
{
*oldlink = currElement;
oldlink = &currElement->link;
@@ -1644,9 +1644,9 @@ dir_realloc(HTAB *hashp)
{
HASHSEGMENT *p;
HASHSEGMENT *old_p;
- long new_dsize;
- long old_dirsize;
- long new_dirsize;
+ int64 new_dsize;
+ int64 old_dirsize;
+ int64 new_dirsize;
if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
return false;
@@ -1780,8 +1780,8 @@ hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
{
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT segp;
- long segment_num;
- long segment_ndx;
+ int64 segment_num;
+ int64 segment_ndx;
uint32 bucket;
bucket = calc_bucket(hctl, hashvalue);
@@ -1814,25 +1814,21 @@ hash_corrupted(HTAB *hashp)
/* calculate ceil(log base 2) of num */
int
-my_log2(long num)
+my_log2(int64 num)
{
/*
* guard against too-large input, which would be invalid for
* pg_ceil_log2_*()
*/
- if (num > LONG_MAX / 2)
- num = LONG_MAX / 2;
+ if (num > PG_INT64_MAX / 2)
+ num = PG_INT64_MAX / 2;
-#if SIZEOF_LONG < 8
- return pg_ceil_log2_32(num);
-#else
return pg_ceil_log2_64(num);
-#endif
}
-/* calculate first power of 2 >= num, bounded to what will fit in a long */
-static long
-next_pow2_long(long num)
+/* calculate first power of 2 >= num, bounded to what will fit in a int64 */
+static int64
+next_pow2_int64(int64 num)
{
/* my_log2's internal range check is sufficient */
return 1L << my_log2(num);
@@ -1840,7 +1836,7 @@ next_pow2_long(long num)
/* calculate first power of 2 >= num, bounded to what will fit in an int */
static int
-next_pow2_int(long num)
+next_pow2_int(int64 num)
{
if (num > INT_MAX / 2)
num = INT_MAX / 2;
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index ea35f30f494..65561cc6bc3 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -271,12 +271,23 @@ Snapshot
GetTransactionSnapshot(void)
{
/*
- * This should not be called while doing logical decoding. Historic
- * snapshots are only usable for catalog access, not for general-purpose
- * queries.
+ * Return historic snapshot if doing logical decoding.
+ *
+ * Historic snapshots are only usable for catalog access, not for
+ * general-purpose queries. The caller is responsible for ensuring that
+ * the snapshot is used correctly! (PostgreSQL code never calls this
+ * during logical decoding, but extensions can do it.)
*/
if (HistoricSnapshotActive())
- elog(ERROR, "cannot take query snapshot during logical decoding");
+ {
+ /*
+ * We'll never need a non-historic transaction snapshot in this
+ * (sub-)transaction, so there's no need to be careful to set one up
+ * for later calls to GetTransactionSnapshot().
+ */
+ Assert(!FirstSnapshotSet);
+ return HistoricSnapshot;
+ }
/* First call in transaction? */
if (!FirstSnapshotSet)