summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/pg_stat_statements/pg_stat_statements.c4
-rw-r--r--doc/src/sgml/func/func-admin.sgml3
-rw-r--r--doc/src/sgml/logicaldecoding.sgml8
-rw-r--r--doc/src/sgml/protocol.sgml2
-rw-r--r--src/backend/access/heap/heapam.c11
-rw-r--r--src/backend/access/index/indexam.c10
-rw-r--r--src/backend/access/transam/xlog.c234
-rw-r--r--src/backend/catalog/storage.c2
-rw-r--r--src/backend/commands/typecmds.c24
-rw-r--r--src/backend/storage/ipc/shmem.c4
-rw-r--r--src/backend/storage/lmgr/lock.c2
-rw-r--r--src/backend/storage/lmgr/predicate.c2
-rw-r--r--src/backend/utils/activity/wait_event_names.txt2
-rw-r--r--src/backend/utils/adt/dbsize.c3
-rw-r--r--src/backend/utils/cache/relfilenumbermap.c8
-rw-r--r--src/backend/utils/hash/dynahash.c92
-rw-r--r--src/backend/utils/time/snapmgr.c19
-rw-r--r--src/bin/scripts/t/100_vacuumdb.pl7
-rw-r--r--src/bin/scripts/vacuumdb.c5
-rw-r--r--src/include/storage/lwlocklist.h2
-rw-r--r--src/include/storage/shmem.h2
-rw-r--r--src/include/utils/dynahash.h2
-rw-r--r--src/include/utils/hsearch.h16
-rw-r--r--src/include/utils/snapmgr.h3
-rw-r--r--src/interfaces/libpq/fe-exec.c13
-rw-r--r--src/interfaces/libpq/fe-protocol3.c132
-rw-r--r--src/interfaces/libpq/libpq-int.h2
-rw-r--r--src/test/regress/expected/alter_table.out5
-rw-r--r--src/test/regress/expected/create_table.out12
-rw-r--r--src/test/regress/sql/alter_table.sql6
-rw-r--r--src/test/regress/sql/create_table.sql8
31 files changed, 329 insertions, 316 deletions
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 9fc9635d330..1cb368c8590 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2713,8 +2713,8 @@ entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
HASH_SEQ_STATUS hash_seq;
pgssEntry *entry;
FILE *qfile;
- long num_entries;
- long num_remove = 0;
+ int64 num_entries;
+ int64 num_remove = 0;
pgssHashKey key;
TimestampTz stats_reset;
diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml
index 6347fe60b0c..57ff333159f 100644
--- a/doc/src/sgml/func/func-admin.sgml
+++ b/doc/src/sgml/func/func-admin.sgml
@@ -1834,7 +1834,8 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
<function>pg_relation_filepath</function>. For a relation in the
database's default tablespace, the tablespace can be specified as zero.
Returns <literal>NULL</literal> if no relation in the current database
- is associated with the given values.
+ is associated with the given values, or if dealing with a temporary
+ relation.
</para></entry>
</row>
</tbody>
diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml
index a1f2efb2420..b803a819cf1 100644
--- a/doc/src/sgml/logicaldecoding.sgml
+++ b/doc/src/sgml/logicaldecoding.sgml
@@ -420,10 +420,10 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU
<para>
When slot synchronization is configured as recommended,
and the initial synchronization is performed either automatically or
- manually via pg_sync_replication_slot, the standby can persist the
- synchronized slot only if the following condition is met: The logical
- replication slot on the primary must retain WALs and system catalog
- rows that are still available on the standby. This ensures data
+ manually via <function>pg_sync_replication_slots</function>, the standby
+ can persist the synchronized slot only if the following condition is met:
+ The logical replication slot on the primary must retain WALs and system
+ catalog rows that are still available on the standby. This ensures data
integrity and allows logical replication to continue smoothly after
promotion.
If the required WALs or catalog rows have already been purged from the
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index e63647c093b..b5395604fb8 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -4136,7 +4136,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
message, indicated by the length field.
</para>
<para>
- The maximum key length is 256 bytes. The
+ The minimum and maximum key length are 4 and 256 bytes, respectively. The
<productname>PostgreSQL</productname> server only sends keys up to
32 bytes, but the larger maximum size allows for future server
versions, as well as connection poolers and other middleware, to use
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0dcd6ee817e..7491cc3cb93 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1143,6 +1143,17 @@ heap_beginscan(Relation relation, Snapshot snapshot,
if (!(snapshot && IsMVCCSnapshot(snapshot)))
scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
+ /* Check that a historic snapshot is not used for non-catalog tables */
+ if (snapshot &&
+ IsHistoricMVCCSnapshot(snapshot) &&
+ !RelationIsAccessibleInLogicalDecoding(relation))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot query non-catalog table \"%s\" during logical decoding",
+ RelationGetRelationName(relation))));
+ }
+
/*
* For seqscan and sample scans in a serializable transaction, acquire a
* predicate lock on the entire relation. This is required not only to
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 1a4f36fe0a9..86d11f4ec79 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -263,6 +263,16 @@ index_beginscan(Relation heapRelation,
Assert(snapshot != InvalidSnapshot);
+ /* Check that a historic snapshot is not used for non-catalog tables */
+ if (IsHistoricMVCCSnapshot(snapshot) &&
+ !RelationIsAccessibleInLogicalDecoding(heapRelation))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot query non-catalog table \"%s\" during logical decoding",
+ RelationGetRelationName(heapRelation))));
+ }
+
scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false);
/*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e8909406686..7ffb2179151 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -303,6 +303,11 @@ static bool doPageWrites;
* so it's a plain spinlock. The other locks are held longer (potentially
* over I/O operations), so we use LWLocks for them. These locks are:
*
+ * WALBufMappingLock: must be held to replace a page in the WAL buffer cache.
+ * It is only held while initializing and changing the mapping. If the
+ * contents of the buffer being replaced haven't been written yet, the mapping
+ * lock is released while the write is done, and reacquired afterwards.
+ *
* WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or
* XLogFlush).
*
@@ -468,37 +473,21 @@ typedef struct XLogCtlData
pg_atomic_uint64 logFlushResult; /* last byte + 1 flushed */
/*
- * First initialized page in the cache (first byte position).
- */
- XLogRecPtr InitializedFrom;
-
- /*
- * Latest reserved for initialization page in the cache (last byte
- * position + 1).
+ * Latest initialized page in the cache (last byte position + 1).
*
- * To change the identity of a buffer, you need to advance
- * InitializeReserved first. To change the identity of a buffer that's
+ * To change the identity of a buffer (and InitializedUpTo), you need to
+ * hold WALBufMappingLock. To change the identity of a buffer that's
* still dirty, the old page needs to be written out first, and for that
* you need WALWriteLock, and you need to ensure that there are no
* in-progress insertions to the page by calling
* WaitXLogInsertionsToFinish().
*/
- pg_atomic_uint64 InitializeReserved;
-
- /*
- * Latest initialized page in the cache (last byte position + 1).
- *
- * InitializedUpTo is updated after the buffer initialization. After
- * update, waiters got notification using InitializedUpToCondVar.
- */
- pg_atomic_uint64 InitializedUpTo;
- ConditionVariable InitializedUpToCondVar;
+ XLogRecPtr InitializedUpTo;
/*
* These values do not change after startup, although the pointed-to pages
- * and xlblocks values certainly do. xlblocks values are changed
- * lock-free according to the check for the xlog write position and are
- * accompanied by changes of InitializeReserved and InitializedUpTo.
+ * and xlblocks values certainly do. xlblocks values are protected by
+ * WALBufMappingLock.
*/
char *pages; /* buffers for unwritten XLOG pages */
pg_atomic_uint64 *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */
@@ -821,9 +810,9 @@ XLogInsertRecord(XLogRecData *rdata,
* fullPageWrites from changing until the insertion is finished.
*
* Step 2 can usually be done completely in parallel. If the required WAL
- * page is not initialized yet, you have to go through AdvanceXLInsertBuffer,
- * which will ensure it is initialized. But the WAL writer tries to do that
- * ahead of insertions to avoid that from happening in the critical path.
+ * page is not initialized yet, you have to grab WALBufMappingLock to
+ * initialize it, but the WAL writer tries to do that ahead of insertions
+ * to avoid that from happening in the critical path.
*
*----------
*/
@@ -2005,79 +1994,32 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
XLogRecPtr NewPageBeginPtr;
XLogPageHeader NewPage;
- XLogRecPtr ReservedPtr;
int npages pg_attribute_unused() = 0;
- /*
- * We must run the loop below inside the critical section as we expect
- * XLogCtl->InitializedUpTo to eventually keep up. The most of callers
- * already run inside the critical section. Except for WAL writer, which
- * passed 'opportunistic == true', and therefore we don't perform
- * operations that could error out.
- *
- * Start an explicit critical section anyway though.
- */
- Assert(CritSectionCount > 0 || opportunistic);
- START_CRIT_SECTION();
+ LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
- /*--
- * Loop till we get all the pages in WAL buffer before 'upto' reserved for
- * initialization. Multiple process can initialize different buffers with
- * this loop in parallel as following.
- *
- * 1. Reserve page for initialization using XLogCtl->InitializeReserved.
- * 2. Initialize the reserved page.
- * 3. Attempt to advance XLogCtl->InitializedUpTo,
+ /*
+ * Now that we have the lock, check if someone initialized the page
+ * already.
*/
- ReservedPtr = pg_atomic_read_u64(&XLogCtl->InitializeReserved);
- while (upto >= ReservedPtr || opportunistic)
+ while (upto >= XLogCtl->InitializedUpTo || opportunistic)
{
- Assert(ReservedPtr % XLOG_BLCKSZ == 0);
+ nextidx = XLogRecPtrToBufIdx(XLogCtl->InitializedUpTo);
/*
- * Get ending-offset of the buffer page we need to replace.
- *
- * We don't lookup into xlblocks, but rather calculate position we
- * must wait to be written. If it was written, xlblocks will have this
- * position (or uninitialized)
+ * Get ending-offset of the buffer page we need to replace (this may
+ * be zero if the buffer hasn't been used yet). Fall through if it's
+ * already written out.
*/
- if (ReservedPtr + XLOG_BLCKSZ > XLogCtl->InitializedFrom + XLOG_BLCKSZ * XLOGbuffers)
- OldPageRqstPtr = ReservedPtr + XLOG_BLCKSZ - (XLogRecPtr) XLOG_BLCKSZ * XLOGbuffers;
- else
- OldPageRqstPtr = InvalidXLogRecPtr;
-
- if (LogwrtResult.Write < OldPageRqstPtr && opportunistic)
+ OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
+ if (LogwrtResult.Write < OldPageRqstPtr)
{
/*
- * If we just want to pre-initialize as much as we can without
- * flushing, give up now.
+ * Nope, got work to do. If we just want to pre-initialize as much
+ * as we can without flushing, give up now.
*/
- upto = ReservedPtr - 1;
- break;
- }
-
- /*
- * Attempt to reserve the page for initialization. Failure means that
- * this page got reserved by another process.
- */
- if (!pg_atomic_compare_exchange_u64(&XLogCtl->InitializeReserved,
- &ReservedPtr,
- ReservedPtr + XLOG_BLCKSZ))
- continue;
-
- /*
- * Wait till page gets correctly initialized up to OldPageRqstPtr.
- */
- nextidx = XLogRecPtrToBufIdx(ReservedPtr);
- while (pg_atomic_read_u64(&XLogCtl->InitializedUpTo) < OldPageRqstPtr)
- ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT);
- ConditionVariableCancelSleep();
- Assert(pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) == OldPageRqstPtr);
-
- /* Fall through if it's already written out. */
- if (LogwrtResult.Write < OldPageRqstPtr)
- {
- /* Nope, got work to do. */
+ if (opportunistic)
+ break;
/* Advance shared memory write request position */
SpinLockAcquire(&XLogCtl->info_lck);
@@ -2092,6 +2034,14 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
RefreshXLogWriteResult(LogwrtResult);
if (LogwrtResult.Write < OldPageRqstPtr)
{
+ /*
+ * Must acquire write lock. Release WALBufMappingLock first,
+ * to make sure that all insertions that we need to wait for
+ * can finish (up to this same position). Otherwise we risk
+ * deadlock.
+ */
+ LWLockRelease(WALBufMappingLock);
+
WaitXLogInsertionsToFinish(OldPageRqstPtr);
LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
@@ -2119,6 +2069,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
*/
pgstat_report_fixed = true;
}
+ /* Re-acquire WALBufMappingLock and retry */
+ LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
+ continue;
}
}
@@ -2126,9 +2079,11 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
* Now the next buffer slot is free and we can set it up to be the
* next output page.
*/
- NewPageBeginPtr = ReservedPtr;
+ NewPageBeginPtr = XLogCtl->InitializedUpTo;
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
+ Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
+
NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
/*
@@ -2192,100 +2147,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
*/
pg_write_barrier();
- /*-----
- * Update the value of XLogCtl->xlblocks[nextidx] and try to advance
- * XLogCtl->InitializedUpTo in a lock-less manner.
- *
- * First, let's provide a formal proof of the algorithm. Let it be 'n'
- * process with the following variables in shared memory:
- * f - an array of 'n' boolean flags,
- * v - atomic integer variable.
- *
- * Also, let
- * i - a number of a process,
- * j - local integer variable,
- * CAS(var, oldval, newval) - compare-and-swap atomic operation
- * returning true on success,
- * write_barrier()/read_barrier() - memory barriers.
- *
- * The pseudocode for each process is the following.
- *
- * j := i
- * f[i] := true
- * write_barrier()
- * while CAS(v, j, j + 1):
- * j := j + 1
- * read_barrier()
- * if not f[j]:
- * break
- *
- * Let's prove that v eventually reaches the value of n.
- * 1. Prove by contradiction. Assume v doesn't reach n and stucks
- * on k, where k < n.
- * 2. Process k attempts CAS(v, k, k + 1). 1). If, as we assumed, v
- * gets stuck at k, then this CAS operation must fail. Therefore,
- * v < k when process k attempts CAS(v, k, k + 1).
- * 3. If, as we assumed, v gets stuck at k, then the value k of v
- * must be achieved by some process m, where m < k. The process
- * m must observe f[k] == false. Otherwise, it will later attempt
- * CAS(v, k, k + 1) with success.
- * 4. Therefore, corresponding read_barrier() (while j == k) on
- * process m reached before write_barrier() of process k. But then
- * process k attempts CAS(v, k, k + 1) after process m successfully
- * incremented v to k, and that CAS operation must succeed.
- * That leads to a contradiction. So, there is no such k (k < n)
- * where v gets stuck. Q.E.D.
- *
- * To apply this proof to the code below, we assume
- * XLogCtl->InitializedUpTo will play the role of v with XLOG_BLCKSZ
- * granularity. We also assume setting XLogCtl->xlblocks[nextidx] to
- * NewPageEndPtr to play the role of setting f[i] to true. Also, note
- * that processes can't concurrently map different xlog locations to
- * the same nextidx because we previously requested that
- * XLogCtl->InitializedUpTo >= OldPageRqstPtr. So, a xlog buffer can
- * be taken for initialization only once the previous initialization
- * takes effect on XLogCtl->InitializedUpTo.
- */
-
pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
-
- pg_write_barrier();
-
- while (pg_atomic_compare_exchange_u64(&XLogCtl->InitializedUpTo, &NewPageBeginPtr, NewPageEndPtr))
- {
- NewPageBeginPtr = NewPageEndPtr;
- NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
- nextidx = XLogRecPtrToBufIdx(NewPageBeginPtr);
-
- pg_read_barrier();
-
- if (pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) != NewPageEndPtr)
- {
- /*
- * Page at nextidx wasn't initialized yet, so we can't move
- * InitializedUpto further. It will be moved by backend which
- * will initialize nextidx.
- */
- ConditionVariableBroadcast(&XLogCtl->InitializedUpToCondVar);
- break;
- }
- }
+ XLogCtl->InitializedUpTo = NewPageEndPtr;
npages++;
}
-
- END_CRIT_SECTION();
-
- /*
- * All the pages in WAL buffer before 'upto' were reserved for
- * initialization. However, some pages might be reserved by concurrent
- * processes. Wait till they finish initialization.
- */
- while (upto >= pg_atomic_read_u64(&XLogCtl->InitializedUpTo))
- ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT);
- ConditionVariableCancelSleep();
-
- pg_read_barrier();
+ LWLockRelease(WALBufMappingLock);
#ifdef WAL_DEBUG
if (XLOG_DEBUG && npages > 0)
@@ -5178,10 +5045,6 @@ XLOGShmemInit(void)
pg_atomic_init_u64(&XLogCtl->logWriteResult, InvalidXLogRecPtr);
pg_atomic_init_u64(&XLogCtl->logFlushResult, InvalidXLogRecPtr);
pg_atomic_init_u64(&XLogCtl->unloggedLSN, InvalidXLogRecPtr);
-
- pg_atomic_init_u64(&XLogCtl->InitializeReserved, InvalidXLogRecPtr);
- pg_atomic_init_u64(&XLogCtl->InitializedUpTo, InvalidXLogRecPtr);
- ConditionVariableInit(&XLogCtl->InitializedUpToCondVar);
}
/*
@@ -6205,8 +6068,7 @@ StartupXLOG(void)
memset(page + len, 0, XLOG_BLCKSZ - len);
pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
- pg_atomic_write_u64(&XLogCtl->InitializedUpTo, endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
- XLogCtl->InitializedFrom = endOfRecoveryInfo->lastPageBeginPtr;
+ XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
}
else
{
@@ -6215,10 +6077,8 @@ StartupXLOG(void)
* let the first attempt to insert a log record to initialize the next
* buffer.
*/
- pg_atomic_write_u64(&XLogCtl->InitializedUpTo, EndOfLog);
- XLogCtl->InitializedFrom = EndOfLog;
+ XLogCtl->InitializedUpTo = EndOfLog;
}
- pg_atomic_write_u64(&XLogCtl->InitializeReserved, pg_atomic_read_u64(&XLogCtl->InitializedUpTo));
/*
* Update local and shared status. This is OK to do without any locks
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 227df90f89c..fb784acf4af 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -586,7 +586,7 @@ RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Size
EstimatePendingSyncsSpace(void)
{
- long entries;
+ int64 entries;
entries = pendingSyncHash ? hash_get_num_entries(pendingSyncHash) : 0;
return mul_size(1 + entries, sizeof(RelFileLocator));
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 26d985193ae..c6de04819f1 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -126,7 +126,7 @@ static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid);
static Oid findRangeSubOpclass(List *opcname, Oid subtype);
static Oid findRangeCanonicalFunction(List *procname, Oid typeOid);
static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype);
-static void validateDomainCheckConstraint(Oid domainoid, const char *ccbin);
+static void validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmode);
static void validateDomainNotNullConstraint(Oid domainoid);
static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode);
static void checkEnumOwner(HeapTuple tup);
@@ -2986,7 +2986,7 @@ AlterDomainAddConstraint(List *names, Node *newConstraint,
* to.
*/
if (!constr->skip_validation)
- validateDomainCheckConstraint(domainoid, ccbin);
+ validateDomainCheckConstraint(domainoid, ccbin, ShareLock);
/*
* We must send out an sinval message for the domain, to ensure that
@@ -3098,7 +3098,12 @@ AlterDomainValidateConstraint(List *names, const char *constrName)
val = SysCacheGetAttrNotNull(CONSTROID, tuple, Anum_pg_constraint_conbin);
conbin = TextDatumGetCString(val);
- validateDomainCheckConstraint(domainoid, conbin);
+ /*
+ * Locking related relations with ShareUpdateExclusiveLock is ok because
+ * not-yet-valid constraints are still enforced against concurrent inserts
+ * or updates.
+ */
+ validateDomainCheckConstraint(domainoid, conbin, ShareUpdateExclusiveLock);
/*
* Now update the catalog, while we have the door open.
@@ -3191,9 +3196,16 @@ validateDomainNotNullConstraint(Oid domainoid)
/*
* Verify that all columns currently using the domain satisfy the given check
* constraint expression.
+ *
+ * It is used to validate existing constraints and to add newly created check
+ * constraints to a domain.
+ *
+ * The lockmode is used for relations using the domain. It should be
+ * ShareLock when adding a new constraint to domain. It can be
+ * ShareUpdateExclusiveLock when validating an existing constraint.
*/
static void
-validateDomainCheckConstraint(Oid domainoid, const char *ccbin)
+validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmode)
{
Expr *expr = (Expr *) stringToNode(ccbin);
List *rels;
@@ -3210,9 +3222,7 @@ validateDomainCheckConstraint(Oid domainoid, const char *ccbin)
exprstate = ExecPrepareExpr(expr, estate);
/* Fetch relation list with attributes based on this domain */
- /* ShareLock is sufficient to prevent concurrent data changes */
-
- rels = get_rels_with_domain(domainoid, ShareLock);
+ rels = get_rels_with_domain(domainoid, lockmode);
foreach(rt, rels)
{
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index d12a3ca0684..a0770e86796 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -330,8 +330,8 @@ InitShmemIndex(void)
*/
HTAB *
ShmemInitHash(const char *name, /* table string name for shmem index */
- long init_size, /* initial table size */
- long max_size, /* max size of the table */
+ int64 init_size, /* initial table size */
+ int64 max_size, /* max size of the table */
HASHCTL *infoP, /* info about key and bucket size */
int hash_flags) /* info about infoP */
{
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index f8c88147160..233b85b623d 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -443,7 +443,7 @@ void
LockManagerShmemInit(void)
{
HASHCTL info;
- long init_table_size,
+ int64 init_table_size,
max_table_size;
bool found;
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index c07fb588355..c1d8511ad17 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -1145,7 +1145,7 @@ void
PredicateLockShmemInit(void)
{
HASHCTL info;
- long max_table_size;
+ int64 max_table_size;
Size requestSize;
bool found;
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 0be307d2ca0..5427da5bc1b 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -156,7 +156,6 @@ REPLICATION_SLOT_DROP "Waiting for a replication slot to become inactive so it c
RESTORE_COMMAND "Waiting for <xref linkend="guc-restore-command"/> to complete."
SAFE_SNAPSHOT "Waiting to obtain a valid snapshot for a <literal>READ ONLY DEFERRABLE</literal> transaction."
SYNC_REP "Waiting for confirmation from a remote server during synchronous replication."
-WAL_BUFFER_INIT "Waiting on WAL buffer to be initialized."
WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit."
WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication."
WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated."
@@ -316,6 +315,7 @@ XidGen "Waiting to allocate a new transaction ID."
ProcArray "Waiting to access the shared per-process data structures (typically, to get a snapshot or report a session's transaction ID)."
SInvalRead "Waiting to retrieve messages from the shared catalog invalidation queue."
SInvalWrite "Waiting to add a message to the shared catalog invalidation queue."
+WALBufMapping "Waiting to replace a page in WAL buffers."
WALWrite "Waiting for WAL buffers to be written to disk."
ControlFile "Waiting to read or update the <filename>pg_control</filename> file or create a new WAL file."
MultiXactGen "Waiting to read or update shared multixact state."
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 25865b660ef..3a059f4ace0 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -938,6 +938,9 @@ pg_relation_filenode(PG_FUNCTION_ARGS)
*
* We don't fail but return NULL if we cannot find a mapping.
*
+ * Temporary relations are not detected, returning NULL (see
+ * RelidByRelfilenumber() for the reasons).
+ *
* InvalidOid can be passed instead of the current database's default
* tablespace.
*/
diff --git a/src/backend/utils/cache/relfilenumbermap.c b/src/backend/utils/cache/relfilenumbermap.c
index 8a2f6f8c693..0b6f9cf3fa1 100644
--- a/src/backend/utils/cache/relfilenumbermap.c
+++ b/src/backend/utils/cache/relfilenumbermap.c
@@ -130,6 +130,11 @@ InitializeRelfilenumberMap(void)
* Map a relation's (tablespace, relfilenumber) to a relation's oid and cache
* the result.
*
+ * A temporary relation may share its relfilenumber with a permanent relation
+ * or temporary relations created in other backends. Being able to uniquely
+ * identify a temporary relation would require a backend's proc number, which
+ * we do not know about. Hence, this function ignores this case.
+ *
* Returns InvalidOid if no relation matching the criteria could be found.
*/
Oid
@@ -208,6 +213,9 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
{
Form_pg_class classform = (Form_pg_class) GETSTRUCT(ntp);
+ if (classform->relpersistence == RELPERSISTENCE_TEMP)
+ continue;
+
if (found)
elog(ERROR,
"unexpected duplicate for tablespace %u, relfilenumber %u",
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a7094917c20..1aeee5be42a 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -154,7 +154,7 @@ typedef HASHBUCKET *HASHSEGMENT;
typedef struct
{
slock_t mutex; /* spinlock for this freelist */
- long nentries; /* number of entries in associated buckets */
+ int64 nentries; /* number of entries in associated buckets */
HASHELEMENT *freeList; /* chain of free elements */
} FreeListData;
@@ -182,8 +182,8 @@ struct HASHHDR
/* These fields can change, but not in a partitioned table */
/* Also, dsize can't change in a shared table, even if unpartitioned */
- long dsize; /* directory size */
- long nsegs; /* number of allocated segments (<= dsize) */
+ int64 dsize; /* directory size */
+ int64 nsegs; /* number of allocated segments (<= dsize) */
uint32 max_bucket; /* ID of maximum bucket in use */
uint32 high_mask; /* mask to modulo into entire table */
uint32 low_mask; /* mask to modulo into lower half of table */
@@ -191,9 +191,9 @@ struct HASHHDR
/* These fields are fixed at hashtable creation */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
- long num_partitions; /* # partitions (must be power of 2), or 0 */
- long max_dsize; /* 'dsize' limit if directory is fixed size */
- long ssize; /* segment size --- must be power of 2 */
+ int64 num_partitions; /* # partitions (must be power of 2), or 0 */
+ int64 max_dsize; /* 'dsize' limit if directory is fixed size */
+ int64 ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
int nelem_alloc; /* number of entries to allocate at once */
bool isfixed; /* if true, don't enlarge */
@@ -236,7 +236,7 @@ struct HTAB
/* We keep local copies of these fixed values to reduce contention */
Size keysize; /* hash key length in bytes */
- long ssize; /* segment size --- must be power of 2 */
+ int64 ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
/*
@@ -277,12 +277,12 @@ static bool expand_table(HTAB *hashp);
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
-static bool init_htab(HTAB *hashp, long nelem);
+static bool init_htab(HTAB *hashp, int64 nelem);
pg_noreturn static void hash_corrupted(HTAB *hashp);
static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
HASHBUCKET **bucketptr);
-static long next_pow2_long(long num);
-static int next_pow2_int(long num);
+static int64 next_pow2_int64(int64 num);
+static int next_pow2_int(int64 num);
static void register_seq_scan(HTAB *hashp);
static void deregister_seq_scan(HTAB *hashp);
static bool has_seq_scans(HTAB *hashp);
@@ -355,7 +355,7 @@ string_compare(const char *key1, const char *key2, Size keysize)
* large nelem will penalize hash_seq_search speed without buying much.
*/
HTAB *
-hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
+hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
{
HTAB *hashp;
HASHHDR *hctl;
@@ -697,7 +697,7 @@ choose_nelem_alloc(Size entrysize)
* arrays
*/
static bool
-init_htab(HTAB *hashp, long nelem)
+init_htab(HTAB *hashp, int64 nelem)
{
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT *segp;
@@ -780,10 +780,10 @@ init_htab(HTAB *hashp, long nelem)
* NB: assumes that all hash structure parameters have default values!
*/
Size
-hash_estimate_size(long num_entries, Size entrysize)
+hash_estimate_size(int64 num_entries, Size entrysize)
{
Size size;
- long nBuckets,
+ int64 nBuckets,
nSegments,
nDirEntries,
nElementAllocs,
@@ -791,9 +791,9 @@ hash_estimate_size(long num_entries, Size entrysize)
elementAllocCnt;
/* estimate number of buckets wanted */
- nBuckets = next_pow2_long(num_entries);
+ nBuckets = next_pow2_int64(num_entries);
/* # of segments needed for nBuckets */
- nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
+ nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
/* directory entries */
nDirEntries = DEF_DIRSIZE;
while (nDirEntries < nSegments)
@@ -826,17 +826,17 @@ hash_estimate_size(long num_entries, Size entrysize)
*
* XXX this had better agree with the behavior of init_htab()...
*/
-long
-hash_select_dirsize(long num_entries)
+int64
+hash_select_dirsize(int64 num_entries)
{
- long nBuckets,
+ int64 nBuckets,
nSegments,
nDirEntries;
/* estimate number of buckets wanted */
- nBuckets = next_pow2_long(num_entries);
+ nBuckets = next_pow2_int64(num_entries);
/* # of segments needed for nBuckets */
- nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
+ nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
/* directory entries */
nDirEntries = DEF_DIRSIZE;
while (nDirEntries < nSegments)
@@ -887,7 +887,7 @@ hash_stats(const char *caller, HTAB *hashp)
HASHHDR *hctl = hashp->hctl;
elog(DEBUG4,
- "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: %ld Key Size: %zu Max Bucket: %u Segment Count: %ld",
+ "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: " INT64_FORMAT " Key Size: %zu Max Bucket: %u Segment Count: " INT64_FORMAT,
caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses,
hctl->collisions, hctl->expansions, hash_get_num_entries(hashp),
hctl->keysize, hctl->max_bucket, hctl->nsegs);
@@ -993,7 +993,7 @@ hash_search_with_hash_value(HTAB *hashp,
* Can't split if running in partitioned mode, nor if frozen, nor if
* table is the subject of any active hash_seq_search scans.
*/
- if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
+ if (hctl->freeList[0].nentries > (int64) hctl->max_bucket &&
!IS_PARTITIONED(hctl) && !hashp->frozen &&
!has_seq_scans(hashp))
(void) expand_table(hashp);
@@ -1332,11 +1332,11 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
/*
* hash_get_num_entries -- get the number of entries in a hashtable
*/
-long
+int64
hash_get_num_entries(HTAB *hashp)
{
int i;
- long sum = hashp->hctl->freeList[0].nentries;
+ int64 sum = hashp->hctl->freeList[0].nentries;
/*
* We currently don't bother with acquiring the mutexes; it's only
@@ -1417,9 +1417,9 @@ hash_seq_search(HASH_SEQ_STATUS *status)
HTAB *hashp;
HASHHDR *hctl;
uint32 max_bucket;
- long ssize;
- long segment_num;
- long segment_ndx;
+ int64 ssize;
+ int64 segment_num;
+ int64 segment_ndx;
HASHSEGMENT segp;
uint32 curBucket;
HASHELEMENT *curElem;
@@ -1548,11 +1548,11 @@ expand_table(HTAB *hashp)
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT old_seg,
new_seg;
- long old_bucket,
+ int64 old_bucket,
new_bucket;
- long new_segnum,
+ int64 new_segnum,
new_segndx;
- long old_segnum,
+ int64 old_segnum,
old_segndx;
HASHBUCKET *oldlink,
*newlink;
@@ -1620,7 +1620,7 @@ expand_table(HTAB *hashp)
currElement = nextElement)
{
nextElement = currElement->link;
- if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
+ if ((int64) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
{
*oldlink = currElement;
oldlink = &currElement->link;
@@ -1644,9 +1644,9 @@ dir_realloc(HTAB *hashp)
{
HASHSEGMENT *p;
HASHSEGMENT *old_p;
- long new_dsize;
- long old_dirsize;
- long new_dirsize;
+ int64 new_dsize;
+ int64 old_dirsize;
+ int64 new_dirsize;
if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
return false;
@@ -1780,8 +1780,8 @@ hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
{
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT segp;
- long segment_num;
- long segment_ndx;
+ int64 segment_num;
+ int64 segment_ndx;
uint32 bucket;
bucket = calc_bucket(hctl, hashvalue);
@@ -1814,25 +1814,21 @@ hash_corrupted(HTAB *hashp)
/* calculate ceil(log base 2) of num */
int
-my_log2(long num)
+my_log2(int64 num)
{
/*
* guard against too-large input, which would be invalid for
* pg_ceil_log2_*()
*/
- if (num > LONG_MAX / 2)
- num = LONG_MAX / 2;
+ if (num > PG_INT64_MAX / 2)
+ num = PG_INT64_MAX / 2;
-#if SIZEOF_LONG < 8
- return pg_ceil_log2_32(num);
-#else
return pg_ceil_log2_64(num);
-#endif
}
-/* calculate first power of 2 >= num, bounded to what will fit in a long */
-static long
-next_pow2_long(long num)
+/* calculate first power of 2 >= num, bounded to what will fit in a int64 */
+static int64
+next_pow2_int64(int64 num)
{
/* my_log2's internal range check is sufficient */
return 1L << my_log2(num);
@@ -1840,7 +1836,7 @@ next_pow2_long(long num)
/* calculate first power of 2 >= num, bounded to what will fit in an int */
static int
-next_pow2_int(long num)
+next_pow2_int(int64 num)
{
if (num > INT_MAX / 2)
num = INT_MAX / 2;
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index ea35f30f494..65561cc6bc3 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -271,12 +271,23 @@ Snapshot
GetTransactionSnapshot(void)
{
/*
- * This should not be called while doing logical decoding. Historic
- * snapshots are only usable for catalog access, not for general-purpose
- * queries.
+ * Return historic snapshot if doing logical decoding.
+ *
+ * Historic snapshots are only usable for catalog access, not for
+ * general-purpose queries. The caller is responsible for ensuring that
+ * the snapshot is used correctly! (PostgreSQL code never calls this
+ * during logical decoding, but extensions can do it.)
*/
if (HistoricSnapshotActive())
- elog(ERROR, "cannot take query snapshot during logical decoding");
+ {
+ /*
+ * We'll never need a non-historic transaction snapshot in this
+ * (sub-)transaction, so there's no need to be careful to set one up
+ * for later calls to GetTransactionSnapshot().
+ */
+ Assert(!FirstSnapshotSet);
+ return HistoricSnapshot;
+ }
/* First call in transaction? */
if (!FirstSnapshotSet)
diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl
index 240f0fdd3e5..945c30df156 100644
--- a/src/bin/scripts/t/100_vacuumdb.pl
+++ b/src/bin/scripts/t/100_vacuumdb.pl
@@ -237,9 +237,10 @@ $node->command_fails_like(
qr/cannot vacuum all databases and a specific one at the same time/,
'cannot use option --all and a dbname as argument at the same time');
-$node->safe_psql('postgres',
- 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;'
-);
+$node->safe_psql('postgres', q|
+ CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;
+ ALTER TABLE regression_vacuumdb_test ADD COLUMN c INT GENERATED ALWAYS AS (a + b);
+|);
$node->issues_sql_like(
[
'vacuumdb', '--analyze-only',
diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c
index 22093e50aa5..fd236087e90 100644
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -14,6 +14,7 @@
#include <limits.h>
+#include "catalog/pg_attribute_d.h"
#include "catalog/pg_class_d.h"
#include "common.h"
#include "common/connect.h"
@@ -973,6 +974,8 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
" AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
" AND NOT a.attisdropped\n"
" AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+ " AND a.attgenerated OPERATOR(pg_catalog.<>) "
+ CppAsString2(ATTRIBUTE_GENERATED_VIRTUAL) "\n"
" AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
" WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
" AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
@@ -1010,6 +1013,8 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
" AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
" AND NOT a.attisdropped\n"
" AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
+ " AND a.attgenerated OPERATOR(pg_catalog.<>) "
+ CppAsString2(ATTRIBUTE_GENERATED_VIRTUAL) "\n"
" AND c.relhassubclass\n"
" AND NOT p.inherited\n"
" AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h
index 208d2e3a8ed..06a1ffd4b08 100644
--- a/src/include/storage/lwlocklist.h
+++ b/src/include/storage/lwlocklist.h
@@ -38,7 +38,7 @@ PG_LWLOCK(3, XidGen)
PG_LWLOCK(4, ProcArray)
PG_LWLOCK(5, SInvalRead)
PG_LWLOCK(6, SInvalWrite)
-/* 7 was WALBufMapping */
+PG_LWLOCK(7, WALBufMapping)
PG_LWLOCK(8, WALWrite)
PG_LWLOCK(9, ControlFile)
/* 10 was CheckpointLock */
diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h
index c1f668ded95..8604feca93b 100644
--- a/src/include/storage/shmem.h
+++ b/src/include/storage/shmem.h
@@ -35,7 +35,7 @@ extern void *ShmemAllocNoError(Size size);
extern void *ShmemAllocUnlocked(Size size);
extern bool ShmemAddrIsValid(const void *addr);
extern void InitShmemIndex(void);
-extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size,
+extern HTAB *ShmemInitHash(const char *name, int64 init_size, int64 max_size,
HASHCTL *infoP, int hash_flags);
extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr);
extern Size add_size(Size s1, Size s2);
diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h
index 8a31d9524e2..a4362d3f65e 100644
--- a/src/include/utils/dynahash.h
+++ b/src/include/utils/dynahash.h
@@ -15,6 +15,6 @@
#ifndef DYNAHASH_H
#define DYNAHASH_H
-extern int my_log2(long num);
+extern int my_log2(int64 num);
#endif /* DYNAHASH_H */
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index 80deb8e543e..cb09a4cbe8c 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -65,12 +65,12 @@ typedef struct HTAB HTAB;
typedef struct HASHCTL
{
/* Used if HASH_PARTITION flag is set: */
- long num_partitions; /* # partitions (must be power of 2) */
+ int64 num_partitions; /* # partitions (must be power of 2) */
/* Used if HASH_SEGMENT flag is set: */
- long ssize; /* segment size */
+ int64 ssize; /* segment size */
/* Used if HASH_DIRSIZE flag is set: */
- long dsize; /* (initial) directory size */
- long max_dsize; /* limit to dsize if dir size is limited */
+ int64 dsize; /* (initial) directory size */
+ int64 max_dsize; /* limit to dsize if dir size is limited */
/* Used if HASH_ELEM flag is set (which is now required): */
Size keysize; /* hash key length in bytes */
Size entrysize; /* total user element size in bytes */
@@ -129,7 +129,7 @@ typedef struct
/*
* prototypes for functions in dynahash.c
*/
-extern HTAB *hash_create(const char *tabname, long nelem,
+extern HTAB *hash_create(const char *tabname, int64 nelem,
const HASHCTL *info, int flags);
extern void hash_destroy(HTAB *hashp);
extern void hash_stats(const char *caller, HTAB *hashp);
@@ -141,7 +141,7 @@ extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
bool *foundPtr);
extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
const void *newKeyPtr);
-extern long hash_get_num_entries(HTAB *hashp);
+extern int64 hash_get_num_entries(HTAB *hashp);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status,
HTAB *hashp,
@@ -149,8 +149,8 @@ extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status,
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
extern void hash_seq_term(HASH_SEQ_STATUS *status);
extern void hash_freeze(HTAB *hashp);
-extern Size hash_estimate_size(long num_entries, Size entrysize);
-extern long hash_select_dirsize(long num_entries);
+extern Size hash_estimate_size(int64 num_entries, Size entrysize);
+extern int64 hash_select_dirsize(int64 num_entries);
extern Size hash_get_shared_size(HASHCTL *info, int flags);
extern void AtEOXact_HashTables(bool isCommit);
extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index d346be71642..604c1f90216 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -56,6 +56,9 @@ extern PGDLLIMPORT SnapshotData SnapshotToastData;
((snapshot)->snapshot_type == SNAPSHOT_MVCC || \
(snapshot)->snapshot_type == SNAPSHOT_HISTORIC_MVCC)
+#define IsHistoricMVCCSnapshot(snapshot) \
+ ((snapshot)->snapshot_type == SNAPSHOT_HISTORIC_MVCC)
+
extern Snapshot GetTransactionSnapshot(void);
extern Snapshot GetLatestSnapshot(void);
extern void SnapshotSetCommandId(CommandId curcid);
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 4256ae5c0cc..0b1e37ec30b 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -1076,8 +1076,12 @@ pqSaveMessageField(PGresult *res, char code, const char *value)
/*
* pqSaveParameterStatus - remember parameter status sent by backend
+ *
+ * Returns 1 on success, 0 on out-of-memory. (Note that on out-of-memory, we
+ * have already released the old value of the parameter, if any. The only
+ * really safe way to recover is to terminate the connection.)
*/
-void
+int
pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
{
pgParameterStatus *pstatus;
@@ -1119,6 +1123,11 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
pstatus->next = conn->pstatus;
conn->pstatus = pstatus;
}
+ else
+ {
+ /* out of memory */
+ return 0;
+ }
/*
* Save values of settings that are of interest to libpq in fields of the
@@ -1190,6 +1199,8 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
{
conn->scram_sha_256_iterations = atoi(value);
}
+
+ return 1;
}
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
index 1599de757d1..43ad672abce 100644
--- a/src/interfaces/libpq/fe-protocol3.c
+++ b/src/interfaces/libpq/fe-protocol3.c
@@ -43,6 +43,7 @@
(id) == PqMsg_RowDescription)
+static void handleFatalError(PGconn *conn);
static void handleSyncLoss(PGconn *conn, char id, int msgLength);
static int getRowDescriptions(PGconn *conn, int msgLength);
static int getParamDescriptions(PGconn *conn, int msgLength);
@@ -120,12 +121,12 @@ pqParseInput3(PGconn *conn)
conn))
{
/*
- * XXX add some better recovery code... plan is to skip over
- * the message using its length, then report an error. For the
- * moment, just treat this like loss of sync (which indeed it
- * might be!)
+ * Abandon the connection. There's not much else we can
+ * safely do; we can't just ignore the message or we could
+ * miss important changes to the connection state.
+ * pqCheckInBufferSpace() already reported the error.
*/
- handleSyncLoss(conn, id, msgLength);
+ handleFatalError(conn);
}
return;
}
@@ -456,6 +457,11 @@ pqParseInput3(PGconn *conn)
/* Normal case: parsing agrees with specified length */
pqParseDone(conn, conn->inCursor);
}
+ else if (conn->error_result && conn->status == CONNECTION_BAD)
+ {
+ /* The connection was abandoned and we already reported it */
+ return;
+ }
else
{
/* Trouble --- report it */
@@ -470,15 +476,14 @@ pqParseInput3(PGconn *conn)
}
/*
- * handleSyncLoss: clean up after loss of message-boundary sync
+ * handleFatalError: clean up after a nonrecoverable error
*
- * There isn't really a lot we can do here except abandon the connection.
+ * This is for errors where we need to abandon the connection. The caller has
+ * already saved the error message in conn->errorMessage.
*/
static void
-handleSyncLoss(PGconn *conn, char id, int msgLength)
+handleFatalError(PGconn *conn)
{
- libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d",
- id, msgLength);
/* build an error result holding the error message */
pqSaveErrorResult(conn);
conn->asyncStatus = PGASYNC_READY; /* drop out of PQgetResult wait loop */
@@ -488,6 +493,19 @@ handleSyncLoss(PGconn *conn, char id, int msgLength)
}
/*
+ * handleSyncLoss: clean up after loss of message-boundary sync
+ *
+ * There isn't really a lot we can do here except abandon the connection.
+ */
+static void
+handleSyncLoss(PGconn *conn, char id, int msgLength)
+{
+ libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d",
+ id, msgLength);
+ handleFatalError(conn);
+}
+
+/*
* parseInput subroutine to read a 'T' (row descriptions) message.
* We'll build a new PGresult structure (unless called for a Describe
* command for a prepared statement) containing the attribute data.
@@ -1519,7 +1537,11 @@ getParameterStatus(PGconn *conn)
return EOF;
}
/* And save it */
- pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data);
+ if (!pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data))
+ {
+ libpq_append_conn_error(conn, "out of memory");
+ handleFatalError(conn);
+ }
termPQExpBuffer(&valueBuf);
return 0;
}
@@ -1547,12 +1569,33 @@ getBackendKeyData(PGconn *conn, int msgLength)
cancel_key_len = 5 + msgLength - (conn->inCursor - conn->inStart);
+ if (cancel_key_len != 4 && conn->pversion == PG_PROTOCOL(3, 0))
+ {
+ libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key with length %d not allowed in protocol version 3.0 (must be 4 bytes)", cancel_key_len);
+ handleFatalError(conn);
+ return 0;
+ }
+
+ if (cancel_key_len < 4)
+ {
+ libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key with length %d is too short (minimum 4 bytes)", cancel_key_len);
+ handleFatalError(conn);
+ return 0;
+ }
+
+ if (cancel_key_len > 256)
+ {
+ libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key with length %d is too long (maximum 256 bytes)", cancel_key_len);
+ handleFatalError(conn);
+ return 0;
+ }
+
conn->be_cancel_key = malloc(cancel_key_len);
if (conn->be_cancel_key == NULL)
{
libpq_append_conn_error(conn, "out of memory");
- /* discard the message */
- return EOF;
+ handleFatalError(conn);
+ return 0;
}
if (pqGetnchar(conn->be_cancel_key, cancel_key_len, conn))
{
@@ -1589,7 +1632,17 @@ getNotify(PGconn *conn)
/* must save name while getting extra string */
svname = strdup(conn->workBuffer.data);
if (!svname)
- return EOF;
+ {
+ /*
+ * Notify messages can arrive at any state, so we cannot associate the
+ * error with any particular query. There's no way to return back an
+ * "async error", so the best we can do is drop the connection. That
+ * seems better than silently ignoring the notification.
+ */
+ libpq_append_conn_error(conn, "out of memory");
+ handleFatalError(conn);
+ return 0;
+ }
if (pqGets(&conn->workBuffer, conn))
{
free(svname);
@@ -1604,21 +1657,26 @@ getNotify(PGconn *conn)
nmlen = strlen(svname);
extralen = strlen(conn->workBuffer.data);
newNotify = (PGnotify *) malloc(sizeof(PGnotify) + nmlen + extralen + 2);
- if (newNotify)
- {
- newNotify->relname = (char *) newNotify + sizeof(PGnotify);
- strcpy(newNotify->relname, svname);
- newNotify->extra = newNotify->relname + nmlen + 1;
- strcpy(newNotify->extra, conn->workBuffer.data);
- newNotify->be_pid = be_pid;
- newNotify->next = NULL;
- if (conn->notifyTail)
- conn->notifyTail->next = newNotify;
- else
- conn->notifyHead = newNotify;
- conn->notifyTail = newNotify;
+ if (!newNotify)
+ {
+ free(svname);
+ libpq_append_conn_error(conn, "out of memory");
+ handleFatalError(conn);
+ return 0;
}
+ newNotify->relname = (char *) newNotify + sizeof(PGnotify);
+ strcpy(newNotify->relname, svname);
+ newNotify->extra = newNotify->relname + nmlen + 1;
+ strcpy(newNotify->extra, conn->workBuffer.data);
+ newNotify->be_pid = be_pid;
+ newNotify->next = NULL;
+ if (conn->notifyTail)
+ conn->notifyTail->next = newNotify;
+ else
+ conn->notifyHead = newNotify;
+ conn->notifyTail = newNotify;
+
free(svname);
return 0;
}
@@ -1752,12 +1810,12 @@ getCopyDataMessage(PGconn *conn)
conn))
{
/*
- * XXX add some better recovery code... plan is to skip over
- * the message using its length, then report an error. For the
- * moment, just treat this like loss of sync (which indeed it
- * might be!)
+ * Abandon the connection. There's not much else we can
+ * safely do; we can't just ignore the message or we could
+ * miss important changes to the connection state.
+ * pqCheckInBufferSpace() already reported the error.
*/
- handleSyncLoss(conn, id, msgLength);
+ handleFatalError(conn);
return -2;
}
return 0;
@@ -2186,12 +2244,12 @@ pqFunctionCall3(PGconn *conn, Oid fnid,
conn))
{
/*
- * XXX add some better recovery code... plan is to skip over
- * the message using its length, then report an error. For the
- * moment, just treat this like loss of sync (which indeed it
- * might be!)
+ * Abandon the connection. There's not much else we can
+ * safely do; we can't just ignore the message or we could
+ * miss important changes to the connection state.
+ * pqCheckInBufferSpace() already reported the error.
*/
- handleSyncLoss(conn, id, msgLength);
+ handleFatalError(conn);
break;
}
continue;
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index a701c25038a..02c114f1405 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -746,7 +746,7 @@ extern PGresult *pqPrepareAsyncResult(PGconn *conn);
extern void pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...) pg_attribute_printf(2, 3);
extern void pqSaveMessageField(PGresult *res, char code,
const char *value);
-extern void pqSaveParameterStatus(PGconn *conn, const char *name,
+extern int pqSaveParameterStatus(PGconn *conn, const char *name,
const char *value);
extern int pqRowProcessor(PGconn *conn, const char **errmsgp);
extern void pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index 08984dd98f1..b33e06a0d3d 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -3567,12 +3567,15 @@ SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment F
-- filenode function call can return NULL for a relation dropped concurrently
-- with the call's surrounding query, so ignore a NULL mapped_oid for
-- relations that no longer exist after all calls finish.
+-- Temporary relations are ignored, as not supported by pg_filenode_relation().
CREATE TEMP TABLE filenode_mapping AS
SELECT
oid, mapped_oid, reltablespace, relfilenode, relname
FROM pg_class,
pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid
-WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid;
+WHERE relkind IN ('r', 'i', 'S', 't', 'm')
+ AND relpersistence != 't'
+ AND mapped_oid IS DISTINCT FROM oid;
SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid
WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL;
oid | mapped_oid | reltablespace | relfilenode | relname
diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out
index 76604705a93..029beb20aae 100644
--- a/src/test/regress/expected/create_table.out
+++ b/src/test/regress/expected/create_table.out
@@ -102,6 +102,18 @@ ERROR: tables declared WITH OIDS are not supported
-- but explicitly not adding oids is still supported
CREATE TEMP TABLE withoutoid() WITHOUT OIDS; DROP TABLE withoutoid;
CREATE TEMP TABLE withoutoid() WITH (oids = false); DROP TABLE withoutoid;
+-- temporary tables are ignored by pg_filenode_relation().
+CREATE TEMP TABLE relation_filenode_check(c1 int);
+SELECT relpersistence,
+ pg_filenode_relation (reltablespace, pg_relation_filenode(oid))
+ FROM pg_class
+ WHERE relname = 'relation_filenode_check';
+ relpersistence | pg_filenode_relation
+----------------+----------------------
+ t |
+(1 row)
+
+DROP TABLE relation_filenode_check;
-- check restriction with default expressions
-- invalid use of column reference in default expressions
CREATE TABLE default_expr_column (id int DEFAULT (id));
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index fc6e36d0e78..90bf5c17682 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -2202,13 +2202,15 @@ SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment F
-- filenode function call can return NULL for a relation dropped concurrently
-- with the call's surrounding query, so ignore a NULL mapped_oid for
-- relations that no longer exist after all calls finish.
+-- Temporary relations are ignored, as not supported by pg_filenode_relation().
CREATE TEMP TABLE filenode_mapping AS
SELECT
oid, mapped_oid, reltablespace, relfilenode, relname
FROM pg_class,
pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid
-WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid;
-
+WHERE relkind IN ('r', 'i', 'S', 't', 'm')
+ AND relpersistence != 't'
+ AND mapped_oid IS DISTINCT FROM oid;
SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid
WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL;
diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql
index 37a227148e9..9b3e93b4164 100644
--- a/src/test/regress/sql/create_table.sql
+++ b/src/test/regress/sql/create_table.sql
@@ -68,6 +68,14 @@ CREATE TABLE withoid() WITH (oids = true);
CREATE TEMP TABLE withoutoid() WITHOUT OIDS; DROP TABLE withoutoid;
CREATE TEMP TABLE withoutoid() WITH (oids = false); DROP TABLE withoutoid;
+-- temporary tables are ignored by pg_filenode_relation().
+CREATE TEMP TABLE relation_filenode_check(c1 int);
+SELECT relpersistence,
+ pg_filenode_relation (reltablespace, pg_relation_filenode(oid))
+ FROM pg_class
+ WHERE relname = 'relation_filenode_check';
+DROP TABLE relation_filenode_check;
+
-- check restriction with default expressions
-- invalid use of column reference in default expressions
CREATE TABLE default_expr_column (id int DEFAULT (id));