summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/postgres_fdw/expected/postgres_fdw.out8
-rw-r--r--doc/src/sgml/config.sgml12
-rw-r--r--doc/src/sgml/rules.sgml1
-rw-r--r--src/backend/access/heap/heapam.c3
-rw-r--r--src/backend/access/heap/heapam_xlog.c1
-rw-r--r--src/backend/access/heap/vacuumlazy.c14
-rw-r--r--src/backend/access/transam/twophase.c61
-rw-r--r--src/backend/commands/subscriptioncmds.c12
-rw-r--r--src/backend/commands/tablecmds.c12
-rw-r--r--src/backend/executor/functions.c2
-rw-r--r--src/backend/optimizer/plan/subselect.c2
-rw-r--r--src/backend/parser/README1
-rw-r--r--src/backend/parser/analyze.c7
-rw-r--r--src/backend/replication/logical/tablesync.c26
-rw-r--r--src/backend/replication/logical/worker.c25
-rw-r--r--src/backend/replication/walsender.c12
-rw-r--r--src/backend/rewrite/rewriteSearchCycle.c8
-rw-r--r--src/backend/storage/page/meson.build10
-rw-r--r--src/backend/tcop/postgres.c7
-rw-r--r--src/backend/utils/activity/pgstat.c11
-rw-r--r--src/backend/utils/activity/pgstat_shmem.c28
-rw-r--r--src/backend/utils/misc/guc_parameters.dat6
-rw-r--r--src/backend/utils/misc/guc_tables.c1
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/bin/pg_dump/pg_dump.c80
-rw-r--r--src/bin/pg_upgrade/Makefile3
-rw-r--r--src/bin/pg_upgrade/info.c11
-rw-r--r--src/bin/pg_upgrade/pg_upgrade.c6
-rw-r--r--src/bin/pg_upgrade/t/006_transfer_modes.pl67
-rw-r--r--src/include/access/heapam_xlog.h1
-rw-r--r--src/include/access/twophase.h2
-rw-r--r--src/include/replication/worker_internal.h1
-rw-r--r--src/include/utils/guc.h1
-rw-r--r--src/test/regress/expected/memoize.out8
-rw-r--r--src/test/regress/expected/partition_prune.out4
-rw-r--r--src/test/regress/expected/rangefuncs.out8
-rw-r--r--src/test/regress/expected/subselect.out14
-rw-r--r--src/test/regress/expected/union.out14
-rw-r--r--src/test/subscription/Makefile4
-rw-r--r--src/test/subscription/meson.build5
-rw-r--r--src/test/subscription/t/035_conflicts.pl189
41 files changed, 604 insertions, 85 deletions
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 78b8367d289..18d727d7790 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -5086,13 +5086,13 @@ SELECT ft1.c1 FROM ft1 JOIN ft2 on ft1.c1 = ft2.c1 WHERE
-- ===================================================================
EXPLAIN (verbose, costs off)
INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20;
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on public.ft2
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
Batch Size: 1
- -> Subquery Scan on "*SELECT*"
- Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1", NULL::integer, "*SELECT*"."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying(10), 'ft2 '::character(10), NULL::user_enum
+ -> Subquery Scan on unnamed_subquery
+ Output: unnamed_subquery."?column?", unnamed_subquery."?column?_1", NULL::integer, unnamed_subquery."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying(10), 'ft2 '::character(10), NULL::user_enum
-> Foreign Scan on public.ft2 ft2_1
Output: (ft2_1.c1 + 1000), (ft2_1.c2 + 100), (ft2_1.c3 || ft2_1.c3)
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" LIMIT 20::bigint
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0a4b3e55ba5..2a3685f474a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7383,6 +7383,11 @@ local0.* /var/log/postgresql
</varlistentry>
<varlistentry id="guc-debug-print-parse">
+ <term><varname>debug_print_raw_parse</varname> (<type>boolean</type>)
+ <indexterm>
+ <primary><varname>debug_print_raw_parse</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
<term><varname>debug_print_parse</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>debug_print_parse</varname> configuration parameter</primary>
@@ -7401,8 +7406,8 @@ local0.* /var/log/postgresql
<listitem>
<para>
These parameters enable various debugging output to be emitted.
- When set, they print the resulting parse tree, the query rewriter
- output, or the execution plan for each executed query.
+ When set, they print the resulting raw parse tree, the parse tree, the query
+ rewriter output, or the execution plan for each executed query.
These messages are emitted at <literal>LOG</literal> message level, so by
default they will appear in the server log but will not be sent to the
client. You can change that by adjusting
@@ -7422,7 +7427,8 @@ local0.* /var/log/postgresql
<listitem>
<para>
When set, <varname>debug_pretty_print</varname> indents the messages
- produced by <varname>debug_print_parse</varname>,
+ produced by <varname>debug_print_raw_parse</varname>,
+ <varname>debug_print_parse</varname>,
<varname>debug_print_rewritten</varname>, or
<varname>debug_print_plan</varname>. This results in more readable
but much longer output than the <quote>compact</quote> format used when
diff --git a/doc/src/sgml/rules.sgml b/doc/src/sgml/rules.sgml
index 8467d961fd0..282dcd722d4 100644
--- a/doc/src/sgml/rules.sgml
+++ b/doc/src/sgml/rules.sgml
@@ -60,6 +60,7 @@
<acronym>SQL</acronym> statement where the single parts that it is
built from are stored separately. These query trees can be shown
in the server log if you set the configuration parameters
+ <varname>debug_print_raw_parse</varname>,
<varname>debug_print_parse</varname>,
<varname>debug_print_rewritten</varname>, or
<varname>debug_print_plan</varname>. The rule actions are also
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e3e7307ef5f..4c5ae205a7a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2647,9 +2647,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
*/
if (all_frozen_set)
{
- Assert(PageIsAllVisible(page));
- Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
-
/*
* It's fine to use InvalidTransactionId here - this is only used
* when HEAP_INSERT_FROZEN is specified, which intentionally
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 5d48f071f53..cf843277938 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -295,7 +295,6 @@ heap_xlog_visible(XLogReaderState *record)
LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
reln = CreateFakeRelcacheEntry(rlocator);
- visibilitymap_pin(reln, blkno, &vmbuffer);
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
xlrec->snapshotConflictHorizon, vmbits);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 932701d8420..981d9380a92 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -2121,8 +2121,11 @@ lazy_scan_prune(LVRelState *vacrel,
else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
{
- elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
- vacrel->relname, blkno);
+ ereport(WARNING,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
+ vacrel->relname, blkno)));
+
visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
VISIBILITYMAP_VALID_BITS);
}
@@ -2143,8 +2146,11 @@ lazy_scan_prune(LVRelState *vacrel,
*/
else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
{
- elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
- vacrel->relname, blkno);
+ ereport(WARNING,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
+ vacrel->relname, blkno)));
+
PageClearAllVisible(page);
MarkBufferDirty(buf);
visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 7918176fc58..d8e2fce2c99 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -103,6 +103,7 @@
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
@@ -2332,12 +2333,17 @@ RecordTransactionCommitPrepared(TransactionId xid,
replorigin = (replorigin_session_origin != InvalidRepOriginId &&
replorigin_session_origin != DoNotReplicateId);
+ /* Load the injection point before entering the critical section */
+ INJECTION_POINT_LOAD("commit-after-delay-checkpoint");
+
START_CRIT_SECTION();
/* See notes in RecordTransactionCommit */
Assert((MyProc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0);
MyProc->delayChkptFlags |= DELAY_CHKPT_IN_COMMIT;
+ INJECTION_POINT_CACHED("commit-after-delay-checkpoint", NULL);
+
/*
* Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before
* commit time is written.
@@ -2809,3 +2815,58 @@ LookupGXactBySubid(Oid subid)
return found;
}
+
+/*
+ * TwoPhaseGetXidByLockingProc
+ * Return the oldest transaction ID from prepared transactions that are
+ * currently in the commit critical section.
+ *
+ * This function only considers transactions in the currently connected
+ * database. If no matching transactions are found, it returns
+ * InvalidTransactionId.
+ */
+TransactionId
+TwoPhaseGetOldestXidInCommit(void)
+{
+ TransactionId oldestRunningXid = InvalidTransactionId;
+
+ LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
+
+ for (int i = 0; i < TwoPhaseState->numPrepXacts; i++)
+ {
+ GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+ PGPROC *commitproc;
+ TransactionId xid;
+
+ if (!gxact->valid)
+ continue;
+
+ if (gxact->locking_backend == INVALID_PROC_NUMBER)
+ continue;
+
+ /*
+ * Get the backend that is handling the transaction. It's safe to
+ * access this backend while holding TwoPhaseStateLock, as the backend
+ * can only be destroyed after either removing or unlocking the
+ * current global transaction, both of which require an exclusive
+ * TwoPhaseStateLock.
+ */
+ commitproc = GetPGProcByNumber(gxact->locking_backend);
+
+ if (MyDatabaseId != commitproc->databaseId)
+ continue;
+
+ if ((commitproc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0)
+ continue;
+
+ xid = XidFromFullTransactionId(gxact->fxid);
+
+ if (!TransactionIdIsValid(oldestRunningXid) ||
+ TransactionIdPrecedes(xid, oldestRunningXid))
+ oldestRunningXid = xid;
+ }
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ return oldestRunningXid;
+}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 82cf65fae73..750d262fcca 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -854,7 +854,17 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
pgstat_create_subscription(subid);
- if (opts.enabled)
+ /*
+ * Notify the launcher to start the apply worker if the subscription is
+ * enabled, or to create the conflict detection slot if retain_dead_tuples
+ * is enabled.
+ *
+ * Creating the conflict detection slot is essential even when the
+ * subscription is not enabled. This ensures that dead tuples are
+ * retained, which is necessary for accurately identifying the type of
+ * conflict during replication.
+ */
+ if (opts.enabled || opts.retaindeadtuples)
ApplyLauncherWakeupAtCommit();
ObjectAddressSet(myself, SubscriptionRelationId, subid);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 082a3575d62..3be2e051d32 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -42,6 +42,7 @@
#include "catalog/pg_foreign_table.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_largeobject.h"
+#include "catalog/pg_largeobject_metadata.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_policy.h"
@@ -2389,12 +2390,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
/*
* Most system catalogs can't be truncated at all, or at least not unless
* allow_system_table_mods=on. As an exception, however, we allow
- * pg_largeobject to be truncated as part of pg_upgrade, because we need
- * to change its relfilenode to match the old cluster, and allowing a
- * TRUNCATE command to be executed is the easiest way of doing that.
+ * pg_largeobject and pg_largeobject_metadata to be truncated as part of
+ * pg_upgrade, because we need to change its relfilenode to match the old
+ * cluster, and allowing a TRUNCATE command to be executed is the easiest
+ * way of doing that.
*/
if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
- && (!IsBinaryUpgrade || relid != LargeObjectRelationId))
+ && (!IsBinaryUpgrade ||
+ (relid != LargeObjectRelationId &&
+ relid != LargeObjectMetadataRelationId)))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied: \"%s\" is a system catalog",
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 97455b1ed4a..630d708d2a3 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -2483,7 +2483,7 @@ tlist_coercion_finished:
rte = makeNode(RangeTblEntry);
rte->rtekind = RTE_SUBQUERY;
rte->subquery = parse;
- rte->eref = rte->alias = makeAlias("*SELECT*", colnames);
+ rte->eref = makeAlias("unnamed_subquery", colnames);
rte->lateral = false;
rte->inh = false;
rte->inFromCl = true;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31..fae18548e07 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1397,7 +1397,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
*/
nsitem = addRangeTableEntryForSubquery(pstate,
subselect,
- makeAlias("ANY_subquery", NIL),
+ NULL,
use_lateral,
false);
rte = nsitem->p_rte;
diff --git a/src/backend/parser/README b/src/backend/parser/README
index e0c986a41ef..e26eb437a9f 100644
--- a/src/backend/parser/README
+++ b/src/backend/parser/README
@@ -20,6 +20,7 @@ parse_cte.c handle Common Table Expressions (WITH clauses)
parse_expr.c handle expressions like col, col + 3, x = 3 or x = 4
parse_enr.c handle ephemeral named rels (trigger transition tables, ...)
parse_func.c handle functions, table.column and column identifiers
+parse_jsontable.c handle JSON_TABLE
parse_merge.c handle MERGE
parse_node.c create nodes for various structures
parse_oper.c handle operators in expressions
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 34f7c17f576..b9763ea1714 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -777,7 +777,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
*/
nsitem = addRangeTableEntryForSubquery(pstate,
selectQuery,
- makeAlias("*SELECT*", NIL),
+ NULL,
false,
false);
addNSItemToQuery(pstate, nsitem, true, false, false);
@@ -2100,7 +2100,6 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
{
/* Process leaf SELECT */
Query *selectQuery;
- char selectName[32];
ParseNamespaceItem *nsitem;
RangeTblRef *rtr;
ListCell *tl;
@@ -2156,11 +2155,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
/*
* Make the leaf query be a subquery in the top-level rangetable.
*/
- snprintf(selectName, sizeof(selectName), "*SELECT* %d",
- list_length(pstate->p_rtable) + 1);
nsitem = addRangeTableEntryForSubquery(pstate,
selectQuery,
- makeAlias(selectName, NIL),
+ NULL,
false,
false);
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index d3356bc84ee..e6da4028d39 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -1789,6 +1789,32 @@ AllTablesyncsReady(void)
}
/*
+ * Return whether the subscription currently has any relations.
+ *
+ * Note: Unlike HasSubscriptionRelations(), this function relies on cached
+ * information for subscription relations. Additionally, it should not be
+ * invoked outside of apply or tablesync workers, as MySubscription must be
+ * initialized first.
+ */
+bool
+HasSubscriptionRelationsCached(void)
+{
+ bool started_tx;
+ bool has_subrels;
+
+ /* We need up-to-date subscription tables info here */
+ has_subrels = FetchTableStates(&started_tx);
+
+ if (started_tx)
+ {
+ CommitTransactionCommand();
+ pgstat_report_stat(true);
+ }
+
+ return has_subrels;
+}
+
+/*
* Update the two_phase state of the specified subscription in pg_subscription.
*/
void
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index f1ebd63e792..c0f6bef5c28 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -4595,11 +4595,28 @@ wait_for_local_flush(RetainDeadTuplesData *rdt_data)
* workers is complex and not worth the effort, so we simply return if not
* all tables are in the READY state.
*
- * It is safe to add new tables with initial states to the subscription
- * after this check because any changes applied to these tables should
- * have a WAL position greater than the rdt_data->remote_lsn.
+ * Advancing the transaction ID is necessary even when no tables are
+ * currently subscribed, to avoid retaining dead tuples unnecessarily.
+ * While it might seem safe to skip all phases and directly assign
+ * candidate_xid to oldest_nonremovable_xid during the
+ * RDT_GET_CANDIDATE_XID phase in such cases, this is unsafe. If users
+ * concurrently add tables to the subscription, the apply worker may not
+ * process invalidations in time. Consequently,
+ * HasSubscriptionRelationsCached() might miss the new tables, leading to
+ * premature advancement of oldest_nonremovable_xid.
+ *
+ * Performing the check during RDT_WAIT_FOR_LOCAL_FLUSH is safe, as
+ * invalidations are guaranteed to be processed before applying changes
+ * from newly added tables while waiting for the local flush to reach
+ * remote_lsn.
+ *
+ * Additionally, even if we check for subscription tables during
+ * RDT_GET_CANDIDATE_XID, they might be dropped before reaching
+ * RDT_WAIT_FOR_LOCAL_FLUSH. Therefore, it's still necessary to verify
+ * subscription tables at this stage to prevent unnecessary tuple
+ * retention.
*/
- if (!AllTablesyncsReady())
+ if (HasSubscriptionRelationsCached() && !AllTablesyncsReady())
{
TimestampTz now;
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e3dce9dc68d..59822f22b8d 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -51,6 +51,7 @@
#include "access/timeline.h"
#include "access/transam.h"
+#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogreader.h"
@@ -2719,6 +2720,7 @@ ProcessStandbyPSRequestMessage(void)
{
XLogRecPtr lsn = InvalidXLogRecPtr;
TransactionId oldestXidInCommit;
+ TransactionId oldestGXidInCommit;
FullTransactionId nextFullXid;
FullTransactionId fullOldestXidInCommit;
WalSnd *walsnd = MyWalSnd;
@@ -2746,6 +2748,16 @@ ProcessStandbyPSRequestMessage(void)
* ones replicated.
*/
oldestXidInCommit = GetOldestActiveTransactionId(true, false);
+ oldestGXidInCommit = TwoPhaseGetOldestXidInCommit();
+
+ /*
+ * Update the oldest xid for standby transmission if an older prepared
+ * transaction exists and is currently in commit phase.
+ */
+ if (TransactionIdIsValid(oldestGXidInCommit) &&
+ TransactionIdPrecedes(oldestGXidInCommit, oldestXidInCommit))
+ oldestXidInCommit = oldestGXidInCommit;
+
nextFullXid = ReadNextFullTransactionId();
fullOldestXidInCommit = FullTransactionIdFromAllowableAt(nextFullXid,
oldestXidInCommit);
diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c
index 9f95d4dc1b0..5202ef43d10 100644
--- a/src/backend/rewrite/rewriteSearchCycle.c
+++ b/src/backend/rewrite/rewriteSearchCycle.c
@@ -282,8 +282,8 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
newrte = makeNode(RangeTblEntry);
newrte->rtekind = RTE_SUBQUERY;
- newrte->alias = makeAlias("*TLOCRN*", cte->ctecolnames);
- newrte->eref = newrte->alias;
+ newrte->alias = NULL;
+ newrte->eref = makeAlias("*TLOCRN*", cte->ctecolnames);
newsubquery = copyObject(rte1->subquery);
IncrementVarSublevelsUp((Node *) newsubquery, 1, 1);
newrte->subquery = newsubquery;
@@ -379,8 +379,8 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_mark_column));
ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_path_column));
}
- newrte->alias = makeAlias("*TROCRN*", ewcl);
- newrte->eref = newrte->alias;
+ newrte->alias = NULL;
+ newrte->eref = makeAlias("*TROCRN*", ewcl);
/*
* Find the reference to the recursive CTE in the right UNION subquery's
diff --git a/src/backend/storage/page/meson.build b/src/backend/storage/page/meson.build
index c3e4a805862..112f00ff365 100644
--- a/src/backend/storage/page/meson.build
+++ b/src/backend/storage/page/meson.build
@@ -1,7 +1,15 @@
# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+checksum_backend_lib = static_library('checksum_backend_lib',
+ 'checksum.c',
+ dependencies: backend_build_deps,
+ kwargs: internal_lib_args,
+ c_args: vectorize_cflags + unroll_loops_cflags,
+)
+
+backend_link_with += checksum_backend_lib
+
backend_sources += files(
'bufpage.c',
- 'checksum.c',
'itemptr.c',
)
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 0cecd464902..d356830f756 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -649,6 +649,10 @@ pg_parse_query(const char *query_string)
TRACE_POSTGRESQL_QUERY_PARSE_DONE(query_string);
+ if (Debug_print_raw_parse)
+ elog_node_display(LOG, "raw parse tree", raw_parsetree_list,
+ Debug_pretty_print);
+
return raw_parsetree_list;
}
@@ -3697,7 +3701,10 @@ set_debug_options(int debug_flag, GucContext context, GucSource source)
if (debug_flag >= 2)
SetConfigOption("log_statement", "all", context, source);
if (debug_flag >= 3)
+ {
+ SetConfigOption("debug_print_raw_parse", "true", context, source);
SetConfigOption("debug_print_parse", "true", context, source);
+ }
if (debug_flag >= 4)
SetConfigOption("debug_print_plan", "true", context, source);
if (debug_flag >= 5)
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index ffb5b8cce34..f8e91484e36 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -1975,6 +1975,17 @@ pgstat_read_statsfile(void)
header = pgstat_init_entry(key.kind, p);
dshash_release_lock(pgStatLocal.shared_hash, p);
+ if (header == NULL)
+ {
+ /*
+ * It would be tempting to switch this ERROR to a
+ * WARNING, but it would mean that all the statistics
+ * are discarded when the environment fails on OOM.
+ */
+ elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
+ key.kind, key.dboid,
+ key.objid, t);
+ }
if (!read_chunk(fpin,
pgstat_get_entry_data(key.kind, header),
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index 62de3474453..9dc3212f7dd 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -289,6 +289,13 @@ pgstat_detach_shmem(void)
* ------------------------------------------------------------
*/
+/*
+ * Initialize entry newly-created.
+ *
+ * Returns NULL in the event of an allocation failure, so as callers can
+ * take cleanup actions as the entry initialized is already inserted in the
+ * shared hashtable.
+ */
PgStatShared_Common *
pgstat_init_entry(PgStat_Kind kind,
PgStatShared_HashEntry *shhashent)
@@ -311,7 +318,12 @@ pgstat_init_entry(PgStat_Kind kind,
pg_atomic_init_u32(&shhashent->generation, 0);
shhashent->dropped = false;
- chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
+ chunk = dsa_allocate_extended(pgStatLocal.dsa,
+ pgstat_get_kind_info(kind)->shared_size,
+ DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM);
+ if (chunk == InvalidDsaPointer)
+ return NULL;
+
shheader = dsa_get_address(pgStatLocal.dsa, chunk);
shheader->magic = 0xdeadbeef;
@@ -509,6 +521,20 @@ pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
if (!shfound)
{
shheader = pgstat_init_entry(kind, shhashent);
+ if (shheader == NULL)
+ {
+ /*
+ * Failed the allocation of a new entry, so clean up the
+ * shared hashtable before giving up.
+ */
+ dshash_delete_entry(pgStatLocal.shared_hash, shhashent);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory"),
+ errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".",
+ key.kind, key.dboid, key.objid)));
+ }
pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
if (created_entry != NULL)
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index a157cec3c4d..0da01627cfe 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -414,6 +414,12 @@
ifdef => 'DEBUG_NODE_TESTS_ENABLED',
},
+{ name => 'debug_print_raw_parse', type => 'bool', context => 'PGC_USERSET', group => 'LOGGING_WHAT',
+ short_desc => 'Logs each query\'s raw parse tree.',
+ variable => 'Debug_print_raw_parse',
+ boot_val => 'false',
+},
+
{ name => 'debug_print_parse', type => 'bool', context => 'PGC_USERSET', group => 'LOGGING_WHAT',
short_desc => 'Logs each query\'s parse tree.',
variable => 'Debug_print_parse',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 787933a9e5a..00c8376cf4d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -507,6 +507,7 @@ bool AllowAlterSystem = true;
bool log_duration = false;
bool Debug_print_plan = false;
bool Debug_print_parse = false;
+bool Debug_print_raw_parse = false;
bool Debug_print_rewritten = false;
bool Debug_pretty_print = true;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..26c08693564 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -581,6 +581,7 @@
# - What to Log -
+#debug_print_raw_parse = off
#debug_print_parse = off
#debug_print_rewritten = off
#debug_print_plan = off
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index bea793456f9..b4c45ad803e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1131,6 +1131,23 @@ main(int argc, char **argv)
shdepend->dataObj->filtercond = "WHERE classid = 'pg_largeobject'::regclass "
"AND dbid = (SELECT oid FROM pg_database "
" WHERE datname = current_database())";
+
+ /*
+ * If upgrading from v16 or newer, only dump large objects with
+ * comments/seclabels. For these upgrades, pg_upgrade can copy/link
+ * pg_largeobject_metadata's files (which is usually faster) but we
+ * still need to dump LOs with comments/seclabels here so that the
+ * subsequent COMMENT and SECURITY LABEL commands work. pg_upgrade
+ * can't copy/link the files from older versions because aclitem
+ * (needed by pg_largeobject_metadata.lomacl) changed its storage
+ * format in v16.
+ */
+ if (fout->remoteVersion >= 160000)
+ lo_metadata->dataObj->filtercond = "WHERE oid IN "
+ "(SELECT objoid FROM pg_description "
+ "WHERE classoid = " CppAsString2(LargeObjectRelationId) " "
+ "UNION SELECT objoid FROM pg_seclabel "
+ "WHERE classoid = " CppAsString2(LargeObjectRelationId) ")";
}
/*
@@ -3629,26 +3646,32 @@ dumpDatabase(Archive *fout)
/*
* pg_largeobject comes from the old system intact, so set its
* relfrozenxids, relminmxids and relfilenode.
+ *
+ * pg_largeobject_metadata also comes from the old system intact for
+ * upgrades from v16 and newer, so set its relfrozenxids, relminmxids, and
+ * relfilenode, too. pg_upgrade can't copy/link the files from older
+ * versions because aclitem (needed by pg_largeobject_metadata.lomacl)
+ * changed its storage format in v16.
*/
if (dopt->binary_upgrade)
{
PGresult *lo_res;
PQExpBuffer loFrozenQry = createPQExpBuffer();
PQExpBuffer loOutQry = createPQExpBuffer();
+ PQExpBuffer lomOutQry = createPQExpBuffer();
PQExpBuffer loHorizonQry = createPQExpBuffer();
+ PQExpBuffer lomHorizonQry = createPQExpBuffer();
int ii_relfrozenxid,
ii_relfilenode,
ii_oid,
ii_relminmxid;
- /*
- * pg_largeobject
- */
if (fout->remoteVersion >= 90300)
appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, relminmxid, relfilenode, oid\n"
"FROM pg_catalog.pg_class\n"
- "WHERE oid IN (%u, %u);\n",
- LargeObjectRelationId, LargeObjectLOidPNIndexId);
+ "WHERE oid IN (%u, %u, %u, %u);\n",
+ LargeObjectRelationId, LargeObjectLOidPNIndexId,
+ LargeObjectMetadataRelationId, LargeObjectMetadataOidIndexId);
else
appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, 0 AS relminmxid, relfilenode, oid\n"
"FROM pg_catalog.pg_class\n"
@@ -3663,35 +3686,57 @@ dumpDatabase(Archive *fout)
ii_oid = PQfnumber(lo_res, "oid");
appendPQExpBufferStr(loHorizonQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n");
+ appendPQExpBufferStr(lomHorizonQry, "\n-- For binary upgrade, set pg_largeobject_metadata relfrozenxid and relminmxid\n");
appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve pg_largeobject and index relfilenodes\n");
+ appendPQExpBufferStr(lomOutQry, "\n-- For binary upgrade, preserve pg_largeobject_metadata and index relfilenodes\n");
for (int i = 0; i < PQntuples(lo_res); ++i)
{
Oid oid;
RelFileNumber relfilenumber;
+ PQExpBuffer horizonQry;
+ PQExpBuffer outQry;
+
+ oid = atooid(PQgetvalue(lo_res, i, ii_oid));
+ relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode));
- appendPQExpBuffer(loHorizonQry, "UPDATE pg_catalog.pg_class\n"
+ if (oid == LargeObjectRelationId ||
+ oid == LargeObjectLOidPNIndexId)
+ {
+ horizonQry = loHorizonQry;
+ outQry = loOutQry;
+ }
+ else
+ {
+ horizonQry = lomHorizonQry;
+ outQry = lomOutQry;
+ }
+
+ appendPQExpBuffer(horizonQry, "UPDATE pg_catalog.pg_class\n"
"SET relfrozenxid = '%u', relminmxid = '%u'\n"
"WHERE oid = %u;\n",
atooid(PQgetvalue(lo_res, i, ii_relfrozenxid)),
atooid(PQgetvalue(lo_res, i, ii_relminmxid)),
atooid(PQgetvalue(lo_res, i, ii_oid)));
- oid = atooid(PQgetvalue(lo_res, i, ii_oid));
- relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode));
-
- if (oid == LargeObjectRelationId)
- appendPQExpBuffer(loOutQry,
+ if (oid == LargeObjectRelationId ||
+ oid == LargeObjectMetadataRelationId)
+ appendPQExpBuffer(outQry,
"SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
relfilenumber);
- else if (oid == LargeObjectLOidPNIndexId)
- appendPQExpBuffer(loOutQry,
+ else if (oid == LargeObjectLOidPNIndexId ||
+ oid == LargeObjectMetadataOidIndexId)
+ appendPQExpBuffer(outQry,
"SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
relfilenumber);
}
appendPQExpBufferStr(loOutQry,
"TRUNCATE pg_catalog.pg_largeobject;\n");
+ appendPQExpBufferStr(lomOutQry,
+ "TRUNCATE pg_catalog.pg_largeobject_metadata;\n");
+
appendPQExpBufferStr(loOutQry, loHorizonQry->data);
+ appendPQExpBufferStr(lomOutQry, lomHorizonQry->data);
ArchiveEntry(fout, nilCatalogId, createDumpId(),
ARCHIVE_OPTS(.tag = "pg_largeobject",
@@ -3699,11 +3744,20 @@ dumpDatabase(Archive *fout)
.section = SECTION_PRE_DATA,
.createStmt = loOutQry->data));
+ if (fout->remoteVersion >= 160000)
+ ArchiveEntry(fout, nilCatalogId, createDumpId(),
+ ARCHIVE_OPTS(.tag = "pg_largeobject_metadata",
+ .description = "pg_largeobject_metadata",
+ .section = SECTION_PRE_DATA,
+ .createStmt = lomOutQry->data));
+
PQclear(lo_res);
destroyPQExpBuffer(loFrozenQry);
destroyPQExpBuffer(loHorizonQry);
+ destroyPQExpBuffer(lomHorizonQry);
destroyPQExpBuffer(loOutQry);
+ destroyPQExpBuffer(lomOutQry);
}
PQclear(res);
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index f83d2b5d309..69fcf593cae 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -3,8 +3,7 @@
PGFILEDESC = "pg_upgrade - an in-place binary upgrade utility"
PGAPPICON = win32
-# required for 003_upgrade_logical_replication_slots.pl
-EXTRA_INSTALL=contrib/test_decoding
+EXTRA_INSTALL=contrib/test_decoding src/test/modules/dummy_seclabel
subdir = src/bin/pg_upgrade
top_builddir = ../../..
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index c39eb077c2f..7ce08270168 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -498,7 +498,10 @@ get_rel_infos_query(void)
*
* pg_largeobject contains user data that does not appear in pg_dump
* output, so we have to copy that system table. It's easiest to do that
- * by treating it as a user table.
+ * by treating it as a user table. We can do the same for
+ * pg_largeobject_metadata for upgrades from v16 and newer. pg_upgrade
+ * can't copy/link the files from older versions because aclitem (needed
+ * by pg_largeobject_metadata.lomacl) changed its storage format in v16.
*/
appendPQExpBuffer(&query,
"WITH regular_heap (reloid, indtable, toastheap) AS ( "
@@ -514,10 +517,12 @@ get_rel_infos_query(void)
" 'binary_upgrade', 'pg_toast') AND "
" c.oid >= %u::pg_catalog.oid) OR "
" (n.nspname = 'pg_catalog' AND "
- " relname IN ('pg_largeobject') ))), ",
+ " relname IN ('pg_largeobject'%s) ))), ",
(user_opts.transfer_mode == TRANSFER_MODE_SWAP) ?
", " CppAsString2(RELKIND_SEQUENCE) : "",
- FirstNormalObjectId);
+ FirstNormalObjectId,
+ (GET_MAJOR_VERSION(old_cluster.major_version) >= 1600) ?
+ ", 'pg_largeobject_metadata'" : "");
/*
* Add a CTE that collects OIDs of toast tables belonging to the tables
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index d5cd5bf0b3a..490e98fa26f 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -29,9 +29,9 @@
* We control all assignments of pg_enum.oid because these oids are stored
* in user tables as enum values.
*
- * We control all assignments of pg_authid.oid for historical reasons (the
- * oids used to be stored in pg_largeobject_metadata, which is now copied via
- * SQL commands), that might change at some point in the future.
+ * We control all assignments of pg_authid.oid because the oids are stored in
+ * pg_largeobject_metadata, which is copied via file transfer for upgrades
+ * from v16 and newer.
*
* We control all assignments of pg_database.oid because we want the directory
* names to match between the old and new cluster.
diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl
index 348f4021462..2f68f0b56aa 100644
--- a/src/bin/pg_upgrade/t/006_transfer_modes.pl
+++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl
@@ -45,6 +45,22 @@ sub test_mode
$old->append_conf('postgresql.conf', "allow_in_place_tablespaces = true");
}
+ # We can only test security labels if both the old and new installations
+ # have dummy_seclabel.
+ my $test_seclabel = 1;
+ $old->start;
+ if (!$old->check_extension('dummy_seclabel'))
+ {
+ $test_seclabel = 0;
+ }
+ $old->stop;
+ $new->start;
+ if (!$new->check_extension('dummy_seclabel'))
+ {
+ $test_seclabel = 0;
+ }
+ $new->stop;
+
# Create a small variety of simple test objects on the old cluster. We'll
# check that these reach the new version after upgrading.
$old->start;
@@ -83,6 +99,29 @@ sub test_mode
$old->safe_psql('testdb3',
"CREATE TABLE test6 AS SELECT generate_series(607, 711)");
}
+
+ # While we are here, test handling of large objects.
+ $old->safe_psql('postgres', q|
+ CREATE ROLE regress_lo_1;
+ CREATE ROLE regress_lo_2;
+
+ SELECT lo_from_bytea(4532, '\xffffff00');
+ COMMENT ON LARGE OBJECT 4532 IS 'test';
+
+ SELECT lo_from_bytea(4533, '\x0f0f0f0f');
+ ALTER LARGE OBJECT 4533 OWNER TO regress_lo_1;
+ GRANT SELECT ON LARGE OBJECT 4533 TO regress_lo_2;
+ |);
+
+ if ($test_seclabel)
+ {
+ $old->safe_psql('postgres', q|
+ CREATE EXTENSION dummy_seclabel;
+
+ SELECT lo_from_bytea(4534, '\x00ffffff');
+ SECURITY LABEL ON LARGE OBJECT 4534 IS 'classified';
+ |);
+ }
$old->stop;
my $result = command_ok_or_fails_like(
@@ -132,6 +171,34 @@ sub test_mode
$result = $new->safe_psql('testdb3', "SELECT COUNT(*) FROM test6");
is($result, '105', "test6 data after pg_upgrade $mode");
}
+
+ # Tests for large objects
+ $result = $new->safe_psql('postgres', "SELECT lo_get(4532)");
+ is($result, '\xffffff00', "LO contents after upgrade");
+ $result = $new->safe_psql('postgres',
+ "SELECT obj_description(4532, 'pg_largeobject')");
+ is($result, 'test', "comment on LO after pg_upgrade");
+
+ $result = $new->safe_psql('postgres', "SELECT lo_get(4533)");
+ is($result, '\x0f0f0f0f', "LO contents after upgrade");
+ $result = $new->safe_psql('postgres',
+ "SELECT lomowner::regrole FROM pg_largeobject_metadata WHERE oid = 4533");
+ is($result, 'regress_lo_1', "LO owner after upgrade");
+ $result = $new->safe_psql('postgres',
+ "SELECT lomacl FROM pg_largeobject_metadata WHERE oid = 4533");
+ is($result, '{regress_lo_1=rw/regress_lo_1,regress_lo_2=r/regress_lo_1}',
+ "LO ACL after upgrade");
+
+ if ($test_seclabel)
+ {
+ $result = $new->safe_psql('postgres', "SELECT lo_get(4534)");
+ is($result, '\x00ffffff', "LO contents after upgrade");
+ $result = $new->safe_psql('postgres', q|
+ SELECT label FROM pg_seclabel WHERE objoid = 4534
+ AND classoid = 'pg_largeobject'::regclass
+ |);
+ is($result, 'classified', "seclabel on LO after pg_upgrade");
+ }
$new->stop;
}
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 277df6b3cf0..d4c0625b632 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -284,7 +284,6 @@ typedef struct xl_heap_update
*/
typedef struct xl_heap_prune
{
- uint8 reason;
uint8 flags;
/*
diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
index 509bdad9a5d..64463e9f4af 100644
--- a/src/include/access/twophase.h
+++ b/src/include/access/twophase.h
@@ -68,4 +68,6 @@ extern void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid_res,
int szgid);
extern bool LookupGXactBySubid(Oid subid);
+extern TransactionId TwoPhaseGetOldestXidInCommit(void);
+
#endif /* TWOPHASE_H */
diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h
index 62ea1a00580..de003802612 100644
--- a/src/include/replication/worker_internal.h
+++ b/src/include/replication/worker_internal.h
@@ -272,6 +272,7 @@ extern void ReplicationOriginNameForLogicalRep(Oid suboid, Oid relid,
char *originname, Size szoriginname);
extern bool AllTablesyncsReady(void);
+extern bool HasSubscriptionRelationsCached(void);
extern void UpdateTwoPhaseState(Oid suboid, char new_state);
extern void process_syncing_tables(XLogRecPtr current_lsn);
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 72981053e61..756e80a2c2f 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -247,6 +247,7 @@ typedef enum
/* GUC vars that are actually defined in guc_tables.c, rather than elsewhere */
extern PGDLLIMPORT bool Debug_print_plan;
extern PGDLLIMPORT bool Debug_print_parse;
+extern PGDLLIMPORT bool Debug_print_raw_parse;
extern PGDLLIMPORT bool Debug_print_rewritten;
extern PGDLLIMPORT bool Debug_pretty_print;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 150dc1b44cf..fbcaf113266 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -545,15 +545,15 @@ EXPLAIN (COSTS OFF)
SELECT * FROM tab_anti t1 WHERE t1.a IN
(SELECT a FROM tab_anti t2 WHERE t2.b IN
(SELECT t1.b FROM tab_anti t3 WHERE t2.a > 1 OFFSET 0));
- QUERY PLAN
--------------------------------------------------
+ QUERY PLAN
+---------------------------------------------------
Nested Loop Semi Join
-> Seq Scan on tab_anti t1
-> Nested Loop Semi Join
Join Filter: (t1.a = t2.a)
-> Seq Scan on tab_anti t2
- -> Subquery Scan on "ANY_subquery"
- Filter: (t2.b = "ANY_subquery".b)
+ -> Subquery Scan on unnamed_subquery
+ Filter: (t2.b = unnamed_subquery.b)
-> Result
One-Time Filter: (t2.a > 1)
-> Seq Scan on tab_anti t3
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index d1966cd7d82..68ecd951809 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -4763,7 +4763,7 @@ select min(a) over (partition by a order by a) from part_abc where a >= stable_o
QUERY PLAN
----------------------------------------------------------------------------------------------
Append
- -> Subquery Scan on "*SELECT* 1_1"
+ -> Subquery Scan on unnamed_subquery_2
-> WindowAgg
Window: w1 AS (PARTITION BY part_abc.a ORDER BY part_abc.a)
-> Append
@@ -4780,7 +4780,7 @@ select min(a) over (partition by a order by a) from part_abc where a >= stable_o
-> Index Scan using part_abc_3_2_a_idx on part_abc_3_2 part_abc_4
Index Cond: (a >= (stable_one() + 1))
Filter: (d <= stable_one())
- -> Subquery Scan on "*SELECT* 2"
+ -> Subquery Scan on unnamed_subquery_1
-> WindowAgg
Window: w1 AS (PARTITION BY part_abc_5.a ORDER BY part_abc_5.a)
-> Append
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index c21be83aa4a..30241e22da2 100644
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -2130,10 +2130,10 @@ select testrngfunc();
explain (verbose, costs off)
select * from testrngfunc();
- QUERY PLAN
-----------------------------------------------------------
- Subquery Scan on "*SELECT*"
- Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1"
+ QUERY PLAN
+----------------------------------------------------------------------
+ Subquery Scan on unnamed_subquery
+ Output: unnamed_subquery."?column?", unnamed_subquery."?column?_1"
-> Unique
Output: (1), (2)
-> Sort
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index c16dff05bc1..7a1c216a0b1 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1692,14 +1692,14 @@ select * from int4_tbl o where (f1, f1) in
-------------------------------------------------------------------
Nested Loop Semi Join
Output: o.f1
- Join Filter: (o.f1 = "ANY_subquery".f1)
+ Join Filter: (o.f1 = unnamed_subquery.f1)
-> Seq Scan on public.int4_tbl o
Output: o.f1
-> Materialize
- Output: "ANY_subquery".f1, "ANY_subquery".g
- -> Subquery Scan on "ANY_subquery"
- Output: "ANY_subquery".f1, "ANY_subquery".g
- Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
+ Output: unnamed_subquery.f1, unnamed_subquery.g
+ -> Subquery Scan on unnamed_subquery
+ Output: unnamed_subquery.f1, unnamed_subquery.g
+ Filter: (unnamed_subquery.f1 = unnamed_subquery.g)
-> Result
Output: i.f1, ((generate_series(1, 50)) / 10)
-> ProjectSet
@@ -2867,8 +2867,8 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
-> Memoize
Cache Key: b.hundred, b.odd
Cache Mode: binary
- -> Subquery Scan on "ANY_subquery"
- Filter: (b.hundred = "ANY_subquery".min)
+ -> Subquery Scan on unnamed_subquery
+ Filter: (b.hundred = unnamed_subquery.min)
-> Result
InitPlan 1
-> Limit
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 96962817ed4..d3ea433db15 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -942,7 +942,7 @@ SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1;
ERROR: column "q2" does not exist
LINE 1: ... int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1...
^
-DETAIL: There is a column named "q2" in table "*SELECT* 2", but it cannot be referenced from this part of the query.
+DETAIL: There is a column named "q2" in table "unnamed_subquery", but it cannot be referenced from this part of the query.
-- But this should work:
SELECT q1 FROM int8_tbl EXCEPT (((SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1))) ORDER BY 1;
q1
@@ -1338,14 +1338,14 @@ where q2 = q2;
----------------------------------------------------------
Unique
-> Merge Append
- Sort Key: "*SELECT* 1".q1
- -> Subquery Scan on "*SELECT* 1"
+ Sort Key: unnamed_subquery.q1
+ -> Subquery Scan on unnamed_subquery
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: (q2 IS NOT NULL)
- -> Subquery Scan on "*SELECT* 2"
+ -> Subquery Scan on unnamed_subquery_1
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
@@ -1374,14 +1374,14 @@ where -q1 = q2;
--------------------------------------------------------
Unique
-> Merge Append
- Sort Key: "*SELECT* 1".q1
- -> Subquery Scan on "*SELECT* 1"
+ Sort Key: unnamed_subquery.q1
+ -> Subquery Scan on unnamed_subquery
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: ((- q1) = q2)
- -> Subquery Scan on "*SELECT* 2"
+ -> Subquery Scan on unnamed_subquery_1
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile
index 50b65d8f6ea..9d97e7d5c0d 100644
--- a/src/test/subscription/Makefile
+++ b/src/test/subscription/Makefile
@@ -13,9 +13,11 @@ subdir = src/test/subscription
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-EXTRA_INSTALL = contrib/hstore
+EXTRA_INSTALL = contrib/hstore \
+ src/test/modules/injection_points
export with_icu
+export enable_injection_points
check:
$(prove_check)
diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build
index 586ffba434e..20b4e523d93 100644
--- a/src/test/subscription/meson.build
+++ b/src/test/subscription/meson.build
@@ -5,7 +5,10 @@ tests += {
'sd': meson.current_source_dir(),
'bd': meson.current_build_dir(),
'tap': {
- 'env': {'with_icu': icu.found() ? 'yes' : 'no'},
+ 'env': {
+ 'with_icu': icu.found() ? 'yes' : 'no',
+ 'enable_injection_points': get_option('injection_points') ? 'yes' : 'no',
+ },
'tests': [
't/001_rep_changes.pl',
't/002_types.pl',
diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl
index 51b23a39fa9..db0d5b464e8 100644
--- a/src/test/subscription/t/035_conflicts.pl
+++ b/src/test/subscription/t/035_conflicts.pl
@@ -387,6 +387,195 @@ ok( $logfile =~
'update target row was deleted in tab');
###############################################################################
+# Check that the xmin value of the conflict detection slot can be advanced when
+# the subscription has no tables.
+###############################################################################
+
+# Remove the table from the publication
+$node_B->safe_psql('postgres', "ALTER PUBLICATION tap_pub_B DROP TABLE tab");
+
+$node_A->safe_psql('postgres',
+ "ALTER SUBSCRIPTION $subname_AB REFRESH PUBLICATION");
+
+# Remember the next transaction ID to be assigned
+$next_xid = $node_A->safe_psql('postgres', "SELECT txid_current() + 1;");
+
+# Confirm that the xmin value is advanced to the latest nextXid. If no
+# transactions are running, the apply worker selects nextXid as the candidate
+# for the non-removable xid. See GetOldestActiveTransactionId().
+ok( $node_A->poll_query_until(
+ 'postgres',
+ "SELECT xmin = $next_xid from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'"
+ ),
+ "the xmin value of slot 'pg_conflict_detection' is updated on Node A");
+
+# Re-add the table to the publication for further tests
+$node_B->safe_psql('postgres', "ALTER PUBLICATION tap_pub_B ADD TABLE tab");
+
+$node_A->safe_psql('postgres',
+ "ALTER SUBSCRIPTION $subname_AB REFRESH PUBLICATION WITH (copy_data = false)");
+
+###############################################################################
+# Test that publisher's transactions marked with DELAY_CHKPT_IN_COMMIT prevent
+# concurrently deleted tuples on the subscriber from being removed. This test
+# also acts as a safeguard to prevent developers from moving the commit
+# timestamp acquisition before marking DELAY_CHKPT_IN_COMMIT in
+# RecordTransactionCommitPrepared.
+###############################################################################
+
+my $injection_points_supported = $node_B->check_extension('injection_points');
+
+# This test depends on an injection point to block the prepared transaction
+# commit after marking DELAY_CHKPT_IN_COMMIT flag.
+if ($injection_points_supported != 0)
+{
+ $node_B->append_conf('postgresql.conf',
+ "shared_preload_libraries = 'injection_points'
+ max_prepared_transactions = 1");
+ $node_B->restart;
+
+ # Disable the subscription on Node B for testing only one-way
+ # replication.
+ $node_B->psql('postgres', "ALTER SUBSCRIPTION $subname_BA DISABLE;");
+
+ # Wait for the apply worker to stop
+ $node_B->poll_query_until('postgres',
+ "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'"
+ );
+
+ # Truncate the table to cleanup existing dead rows in the table. Then insert
+ # a new row.
+ $node_B->safe_psql(
+ 'postgres', qq(
+ TRUNCATE tab;
+ INSERT INTO tab VALUES(1, 1);
+ ));
+
+ $node_B->wait_for_catchup($subname_AB);
+
+ # Create the injection_points extension on the publisher node and attach to the
+ # commit-after-delay-checkpoint injection point.
+ $node_B->safe_psql(
+ 'postgres',
+ "CREATE EXTENSION injection_points;
+ SELECT injection_points_attach('commit-after-delay-checkpoint', 'wait');"
+ );
+
+ # Start a background session on the publisher node to perform an update and
+ # pause at the injection point.
+ my $pub_session = $node_B->background_psql('postgres');
+ $pub_session->query_until(
+ qr/starting_bg_psql/,
+ q{
+ \echo starting_bg_psql
+ BEGIN;
+ UPDATE tab SET b = 2 WHERE a = 1;
+ PREPARE TRANSACTION 'txn_with_later_commit_ts';
+ COMMIT PREPARED 'txn_with_later_commit_ts';
+ }
+ );
+
+ # Confirm the update is suspended
+ $result =
+ $node_B->safe_psql('postgres', 'SELECT * FROM tab WHERE a = 1');
+ is($result, qq(1|1), 'publisher sees the old row');
+
+ # Delete the row on the subscriber. The deleted row should be retained due to a
+ # transaction on the publisher, which is currently marked with the
+ # DELAY_CHKPT_IN_COMMIT flag.
+ $node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 1;");
+
+ # Get the commit timestamp for the delete
+ my $sub_ts = $node_A->safe_psql('postgres',
+ "SELECT timestamp FROM pg_last_committed_xact();");
+
+ $log_location = -s $node_A->logfile;
+
+ # Confirm that the apply worker keeps requesting publisher status, while
+ # awaiting the prepared transaction to commit. Thus, the request log should
+ # appear more than once.
+ $node_A->wait_for_log(
+ qr/sending publisher status request message/,
+ $log_location);
+
+ $log_location = -s $node_A->logfile;
+
+ $node_A->wait_for_log(
+ qr/sending publisher status request message/,
+ $log_location);
+
+ # Confirm that the dead tuple cannot be removed
+ ($cmdret, $stdout, $stderr) =
+ $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;));
+
+ ok($stderr =~ qr/1 are dead but not yet removable/,
+ 'the deleted column is non-removable');
+
+ $log_location = -s $node_A->logfile;
+
+ # Wakeup and detach the injection point on the publisher node. The prepared
+ # transaction should now commit.
+ $node_B->safe_psql(
+ 'postgres',
+ "SELECT injection_points_wakeup('commit-after-delay-checkpoint');
+ SELECT injection_points_detach('commit-after-delay-checkpoint');"
+ );
+
+ # Close the background session on the publisher node
+ ok($pub_session->quit, "close publisher session");
+
+ # Confirm that the transaction committed
+ $result =
+ $node_B->safe_psql('postgres', 'SELECT * FROM tab WHERE a = 1');
+ is($result, qq(1|2), 'publisher sees the new row');
+
+ # Ensure the UPDATE is replayed on subscriber
+ $node_B->wait_for_catchup($subname_AB);
+
+ $logfile = slurp_file($node_A->logfile(), $log_location);
+ ok( $logfile =~
+ qr/conflict detected on relation "public.tab": conflict=update_deleted.*
+.*DETAIL:.* The row to be updated was deleted locally in transaction [0-9]+ at .*
+.*Remote row \(1, 2\); replica identity full \(1, 1\)/,
+ 'update target row was deleted in tab');
+
+ # Remember the next transaction ID to be assigned
+ $next_xid =
+ $node_A->safe_psql('postgres', "SELECT txid_current() + 1;");
+
+ # Confirm that the xmin value is advanced to the latest nextXid after the
+ # prepared transaction on the publisher has been committed.
+ ok( $node_A->poll_query_until(
+ 'postgres',
+ "SELECT xmin = $next_xid from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'"
+ ),
+ "the xmin value of slot 'pg_conflict_detection' is updated on subscriber"
+ );
+
+ # Confirm that the dead tuple can be removed now
+ ($cmdret, $stdout, $stderr) =
+ $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;));
+
+ ok($stderr =~ qr/1 removed, 0 remain, 0 are dead but not yet removable/,
+ 'the deleted column is removed');
+
+ # Get the commit timestamp for the publisher's update
+ my $pub_ts = $node_B->safe_psql('postgres',
+ "SELECT pg_xact_commit_timestamp(xmin) from tab where a=1;");
+
+ # Check that the commit timestamp for the update on the publisher is later than
+ # or equal to the timestamp of the local deletion, as the commit timestamp
+ # should be assigned after marking the DELAY_CHKPT_IN_COMMIT flag.
+ $result = $node_B->safe_psql('postgres',
+ "SELECT '$pub_ts'::timestamp >= '$sub_ts'::timestamp");
+ is($result, qq(t),
+ "pub UPDATE's timestamp is later than that of sub's DELETE");
+
+ # Re-enable the subscription for further tests
+ $node_B->psql('postgres', "ALTER SUBSCRIPTION $subname_BA ENABLE;");
+}
+
+###############################################################################
# Check that dead tuple retention stops due to the wait time surpassing
# max_retention_duration.
###############################################################################