diff options
Diffstat (limited to 'src/backend')
21 files changed, 220 insertions, 30 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e3e7307ef5f..4c5ae205a7a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2647,9 +2647,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, */ if (all_frozen_set) { - Assert(PageIsAllVisible(page)); - Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer)); - /* * It's fine to use InvalidTransactionId here - this is only used * when HEAP_INSERT_FROZEN is specified, which intentionally diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 5d48f071f53..cf843277938 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -295,7 +295,6 @@ heap_xlog_visible(XLogReaderState *record) LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK); reln = CreateFakeRelcacheEntry(rlocator); - visibilitymap_pin(reln, blkno, &vmbuffer); visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer, xlrec->snapshotConflictHorizon, vmbits); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 932701d8420..981d9380a92 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -2121,8 +2121,11 @@ lazy_scan_prune(LVRelState *vacrel, else if (all_visible_according_to_vm && !PageIsAllVisible(page) && visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) { - elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", - vacrel->relname, blkno); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", + vacrel->relname, blkno))); + visibilitymap_clear(vacrel->rel, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); } @@ -2143,8 +2146,11 @@ lazy_scan_prune(LVRelState *vacrel, */ else if (presult.lpdead_items > 0 && PageIsAllVisible(page)) { - elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", - vacrel->relname, blkno); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", + vacrel->relname, blkno))); + PageClearAllVisible(page); MarkBufferDirty(buf); visibilitymap_clear(vacrel->rel, blkno, vmbuffer, diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 7918176fc58..d8e2fce2c99 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -103,6 +103,7 @@ #include "storage/proc.h" #include "storage/procarray.h" #include "utils/builtins.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/timestamp.h" @@ -2332,12 +2333,17 @@ RecordTransactionCommitPrepared(TransactionId xid, replorigin = (replorigin_session_origin != InvalidRepOriginId && replorigin_session_origin != DoNotReplicateId); + /* Load the injection point before entering the critical section */ + INJECTION_POINT_LOAD("commit-after-delay-checkpoint"); + START_CRIT_SECTION(); /* See notes in RecordTransactionCommit */ Assert((MyProc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0); MyProc->delayChkptFlags |= DELAY_CHKPT_IN_COMMIT; + INJECTION_POINT_CACHED("commit-after-delay-checkpoint", NULL); + /* * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before * commit time is written. @@ -2809,3 +2815,58 @@ LookupGXactBySubid(Oid subid) return found; } + +/* + * TwoPhaseGetXidByLockingProc + * Return the oldest transaction ID from prepared transactions that are + * currently in the commit critical section. + * + * This function only considers transactions in the currently connected + * database. If no matching transactions are found, it returns + * InvalidTransactionId. + */ +TransactionId +TwoPhaseGetOldestXidInCommit(void) +{ + TransactionId oldestRunningXid = InvalidTransactionId; + + LWLockAcquire(TwoPhaseStateLock, LW_SHARED); + + for (int i = 0; i < TwoPhaseState->numPrepXacts; i++) + { + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + PGPROC *commitproc; + TransactionId xid; + + if (!gxact->valid) + continue; + + if (gxact->locking_backend == INVALID_PROC_NUMBER) + continue; + + /* + * Get the backend that is handling the transaction. It's safe to + * access this backend while holding TwoPhaseStateLock, as the backend + * can only be destroyed after either removing or unlocking the + * current global transaction, both of which require an exclusive + * TwoPhaseStateLock. + */ + commitproc = GetPGProcByNumber(gxact->locking_backend); + + if (MyDatabaseId != commitproc->databaseId) + continue; + + if ((commitproc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0) + continue; + + xid = XidFromFullTransactionId(gxact->fxid); + + if (!TransactionIdIsValid(oldestRunningXid) || + TransactionIdPrecedes(xid, oldestRunningXid)) + oldestRunningXid = xid; + } + + LWLockRelease(TwoPhaseStateLock); + + return oldestRunningXid; +} diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 82cf65fae73..750d262fcca 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -854,7 +854,17 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, pgstat_create_subscription(subid); - if (opts.enabled) + /* + * Notify the launcher to start the apply worker if the subscription is + * enabled, or to create the conflict detection slot if retain_dead_tuples + * is enabled. + * + * Creating the conflict detection slot is essential even when the + * subscription is not enabled. This ensures that dead tuples are + * retained, which is necessary for accurately identifying the type of + * conflict during replication. + */ + if (opts.enabled || opts.retaindeadtuples) ApplyLauncherWakeupAtCommit(); ObjectAddressSet(myself, SubscriptionRelationId, subid); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 082a3575d62..3be2e051d32 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -42,6 +42,7 @@ #include "catalog/pg_foreign_table.h" #include "catalog/pg_inherits.h" #include "catalog/pg_largeobject.h" +#include "catalog/pg_largeobject_metadata.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" #include "catalog/pg_policy.h" @@ -2389,12 +2390,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple) /* * Most system catalogs can't be truncated at all, or at least not unless * allow_system_table_mods=on. As an exception, however, we allow - * pg_largeobject to be truncated as part of pg_upgrade, because we need - * to change its relfilenode to match the old cluster, and allowing a - * TRUNCATE command to be executed is the easiest way of doing that. + * pg_largeobject and pg_largeobject_metadata to be truncated as part of + * pg_upgrade, because we need to change its relfilenode to match the old + * cluster, and allowing a TRUNCATE command to be executed is the easiest + * way of doing that. */ if (!allowSystemTableMods && IsSystemClass(relid, reltuple) - && (!IsBinaryUpgrade || relid != LargeObjectRelationId)) + && (!IsBinaryUpgrade || + (relid != LargeObjectRelationId && + relid != LargeObjectMetadataRelationId))) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied: \"%s\" is a system catalog", diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 97455b1ed4a..630d708d2a3 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -2483,7 +2483,7 @@ tlist_coercion_finished: rte = makeNode(RangeTblEntry); rte->rtekind = RTE_SUBQUERY; rte->subquery = parse; - rte->eref = rte->alias = makeAlias("*SELECT*", colnames); + rte->eref = makeAlias("unnamed_subquery", colnames); rte->lateral = false; rte->inh = false; rte->inFromCl = true; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index d71ed958e31..fae18548e07 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1397,7 +1397,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, */ nsitem = addRangeTableEntryForSubquery(pstate, subselect, - makeAlias("ANY_subquery", NIL), + NULL, use_lateral, false); rte = nsitem->p_rte; diff --git a/src/backend/parser/README b/src/backend/parser/README index e0c986a41ef..e26eb437a9f 100644 --- a/src/backend/parser/README +++ b/src/backend/parser/README @@ -20,6 +20,7 @@ parse_cte.c handle Common Table Expressions (WITH clauses) parse_expr.c handle expressions like col, col + 3, x = 3 or x = 4 parse_enr.c handle ephemeral named rels (trigger transition tables, ...) parse_func.c handle functions, table.column and column identifiers +parse_jsontable.c handle JSON_TABLE parse_merge.c handle MERGE parse_node.c create nodes for various structures parse_oper.c handle operators in expressions diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 34f7c17f576..b9763ea1714 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -777,7 +777,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) */ nsitem = addRangeTableEntryForSubquery(pstate, selectQuery, - makeAlias("*SELECT*", NIL), + NULL, false, false); addNSItemToQuery(pstate, nsitem, true, false, false); @@ -2100,7 +2100,6 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, { /* Process leaf SELECT */ Query *selectQuery; - char selectName[32]; ParseNamespaceItem *nsitem; RangeTblRef *rtr; ListCell *tl; @@ -2156,11 +2155,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, /* * Make the leaf query be a subquery in the top-level rangetable. */ - snprintf(selectName, sizeof(selectName), "*SELECT* %d", - list_length(pstate->p_rtable) + 1); nsitem = addRangeTableEntryForSubquery(pstate, selectQuery, - makeAlias(selectName, NIL), + NULL, false, false); diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index d3356bc84ee..e6da4028d39 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -1789,6 +1789,32 @@ AllTablesyncsReady(void) } /* + * Return whether the subscription currently has any relations. + * + * Note: Unlike HasSubscriptionRelations(), this function relies on cached + * information for subscription relations. Additionally, it should not be + * invoked outside of apply or tablesync workers, as MySubscription must be + * initialized first. + */ +bool +HasSubscriptionRelationsCached(void) +{ + bool started_tx; + bool has_subrels; + + /* We need up-to-date subscription tables info here */ + has_subrels = FetchTableStates(&started_tx); + + if (started_tx) + { + CommitTransactionCommand(); + pgstat_report_stat(true); + } + + return has_subrels; +} + +/* * Update the two_phase state of the specified subscription in pg_subscription. */ void diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index f1ebd63e792..c0f6bef5c28 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -4595,11 +4595,28 @@ wait_for_local_flush(RetainDeadTuplesData *rdt_data) * workers is complex and not worth the effort, so we simply return if not * all tables are in the READY state. * - * It is safe to add new tables with initial states to the subscription - * after this check because any changes applied to these tables should - * have a WAL position greater than the rdt_data->remote_lsn. + * Advancing the transaction ID is necessary even when no tables are + * currently subscribed, to avoid retaining dead tuples unnecessarily. + * While it might seem safe to skip all phases and directly assign + * candidate_xid to oldest_nonremovable_xid during the + * RDT_GET_CANDIDATE_XID phase in such cases, this is unsafe. If users + * concurrently add tables to the subscription, the apply worker may not + * process invalidations in time. Consequently, + * HasSubscriptionRelationsCached() might miss the new tables, leading to + * premature advancement of oldest_nonremovable_xid. + * + * Performing the check during RDT_WAIT_FOR_LOCAL_FLUSH is safe, as + * invalidations are guaranteed to be processed before applying changes + * from newly added tables while waiting for the local flush to reach + * remote_lsn. + * + * Additionally, even if we check for subscription tables during + * RDT_GET_CANDIDATE_XID, they might be dropped before reaching + * RDT_WAIT_FOR_LOCAL_FLUSH. Therefore, it's still necessary to verify + * subscription tables at this stage to prevent unnecessary tuple + * retention. */ - if (!AllTablesyncsReady()) + if (HasSubscriptionRelationsCached() && !AllTablesyncsReady()) { TimestampTz now; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index e3dce9dc68d..59822f22b8d 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -51,6 +51,7 @@ #include "access/timeline.h" #include "access/transam.h" +#include "access/twophase.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "access/xlogreader.h" @@ -2719,6 +2720,7 @@ ProcessStandbyPSRequestMessage(void) { XLogRecPtr lsn = InvalidXLogRecPtr; TransactionId oldestXidInCommit; + TransactionId oldestGXidInCommit; FullTransactionId nextFullXid; FullTransactionId fullOldestXidInCommit; WalSnd *walsnd = MyWalSnd; @@ -2746,6 +2748,16 @@ ProcessStandbyPSRequestMessage(void) * ones replicated. */ oldestXidInCommit = GetOldestActiveTransactionId(true, false); + oldestGXidInCommit = TwoPhaseGetOldestXidInCommit(); + + /* + * Update the oldest xid for standby transmission if an older prepared + * transaction exists and is currently in commit phase. + */ + if (TransactionIdIsValid(oldestGXidInCommit) && + TransactionIdPrecedes(oldestGXidInCommit, oldestXidInCommit)) + oldestXidInCommit = oldestGXidInCommit; + nextFullXid = ReadNextFullTransactionId(); fullOldestXidInCommit = FullTransactionIdFromAllowableAt(nextFullXid, oldestXidInCommit); diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c index 9f95d4dc1b0..5202ef43d10 100644 --- a/src/backend/rewrite/rewriteSearchCycle.c +++ b/src/backend/rewrite/rewriteSearchCycle.c @@ -282,8 +282,8 @@ rewriteSearchAndCycle(CommonTableExpr *cte) newrte = makeNode(RangeTblEntry); newrte->rtekind = RTE_SUBQUERY; - newrte->alias = makeAlias("*TLOCRN*", cte->ctecolnames); - newrte->eref = newrte->alias; + newrte->alias = NULL; + newrte->eref = makeAlias("*TLOCRN*", cte->ctecolnames); newsubquery = copyObject(rte1->subquery); IncrementVarSublevelsUp((Node *) newsubquery, 1, 1); newrte->subquery = newsubquery; @@ -379,8 +379,8 @@ rewriteSearchAndCycle(CommonTableExpr *cte) ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_mark_column)); ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_path_column)); } - newrte->alias = makeAlias("*TROCRN*", ewcl); - newrte->eref = newrte->alias; + newrte->alias = NULL; + newrte->eref = makeAlias("*TROCRN*", ewcl); /* * Find the reference to the recursive CTE in the right UNION subquery's diff --git a/src/backend/storage/page/meson.build b/src/backend/storage/page/meson.build index c3e4a805862..112f00ff365 100644 --- a/src/backend/storage/page/meson.build +++ b/src/backend/storage/page/meson.build @@ -1,7 +1,15 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group +checksum_backend_lib = static_library('checksum_backend_lib', + 'checksum.c', + dependencies: backend_build_deps, + kwargs: internal_lib_args, + c_args: vectorize_cflags + unroll_loops_cflags, +) + +backend_link_with += checksum_backend_lib + backend_sources += files( 'bufpage.c', - 'checksum.c', 'itemptr.c', ) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 0cecd464902..d356830f756 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -649,6 +649,10 @@ pg_parse_query(const char *query_string) TRACE_POSTGRESQL_QUERY_PARSE_DONE(query_string); + if (Debug_print_raw_parse) + elog_node_display(LOG, "raw parse tree", raw_parsetree_list, + Debug_pretty_print); + return raw_parsetree_list; } @@ -3697,7 +3701,10 @@ set_debug_options(int debug_flag, GucContext context, GucSource source) if (debug_flag >= 2) SetConfigOption("log_statement", "all", context, source); if (debug_flag >= 3) + { + SetConfigOption("debug_print_raw_parse", "true", context, source); SetConfigOption("debug_print_parse", "true", context, source); + } if (debug_flag >= 4) SetConfigOption("debug_print_plan", "true", context, source); if (debug_flag >= 5) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index ffb5b8cce34..f8e91484e36 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -1975,6 +1975,17 @@ pgstat_read_statsfile(void) header = pgstat_init_entry(key.kind, p); dshash_release_lock(pgStatLocal.shared_hash, p); + if (header == NULL) + { + /* + * It would be tempting to switch this ERROR to a + * WARNING, but it would mean that all the statistics + * are discarded when the environment fails on OOM. + */ + elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c", + key.kind, key.dboid, + key.objid, t); + } if (!read_chunk(fpin, pgstat_get_entry_data(key.kind, header), diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index 62de3474453..9dc3212f7dd 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -289,6 +289,13 @@ pgstat_detach_shmem(void) * ------------------------------------------------------------ */ +/* + * Initialize entry newly-created. + * + * Returns NULL in the event of an allocation failure, so as callers can + * take cleanup actions as the entry initialized is already inserted in the + * shared hashtable. + */ PgStatShared_Common * pgstat_init_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent) @@ -311,7 +318,12 @@ pgstat_init_entry(PgStat_Kind kind, pg_atomic_init_u32(&shhashent->generation, 0); shhashent->dropped = false; - chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size); + chunk = dsa_allocate_extended(pgStatLocal.dsa, + pgstat_get_kind_info(kind)->shared_size, + DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM); + if (chunk == InvalidDsaPointer) + return NULL; + shheader = dsa_get_address(pgStatLocal.dsa, chunk); shheader->magic = 0xdeadbeef; @@ -509,6 +521,20 @@ pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create, if (!shfound) { shheader = pgstat_init_entry(kind, shhashent); + if (shheader == NULL) + { + /* + * Failed the allocation of a new entry, so clean up the + * shared hashtable before giving up. + */ + dshash_delete_entry(pgStatLocal.shared_hash, shhashent); + + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".", + key.kind, key.dboid, key.objid))); + } pgstat_acquire_entry_ref(entry_ref, shhashent, shheader); if (created_entry != NULL) diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index a157cec3c4d..0da01627cfe 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -414,6 +414,12 @@ ifdef => 'DEBUG_NODE_TESTS_ENABLED', }, +{ name => 'debug_print_raw_parse', type => 'bool', context => 'PGC_USERSET', group => 'LOGGING_WHAT', + short_desc => 'Logs each query\'s raw parse tree.', + variable => 'Debug_print_raw_parse', + boot_val => 'false', +}, + { name => 'debug_print_parse', type => 'bool', context => 'PGC_USERSET', group => 'LOGGING_WHAT', short_desc => 'Logs each query\'s parse tree.', variable => 'Debug_print_parse', diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 787933a9e5a..00c8376cf4d 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -507,6 +507,7 @@ bool AllowAlterSystem = true; bool log_duration = false; bool Debug_print_plan = false; bool Debug_print_parse = false; +bool Debug_print_raw_parse = false; bool Debug_print_rewritten = false; bool Debug_pretty_print = true; diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index a9d8293474a..26c08693564 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -581,6 +581,7 @@ # - What to Log - +#debug_print_raw_parse = off #debug_print_parse = off #debug_print_rewritten = off #debug_print_plan = off |