summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMelanie Plageman <melanieplageman@gmail.com>2025-10-09 16:25:50 -0400
committerMelanie Plageman <melanieplageman@gmail.com>2025-10-09 16:29:01 -0400
commitd96f87332b3786abd23cba47459546799c562b8c (patch)
tree4f4fdd37f5047ec9b09dee98679983c6545999df
parent1b073cba4993b31fbf820504f297efce5d951c00 (diff)
Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE
Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting bits in the VM, specify the VM block changes in the XLOG_HEAP2_MULTI_INSERT record. This halves the number of WAL records emitted by COPY FREEZE. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
-rw-r--r--src/backend/access/heap/heapam.c43
-rw-r--r--src/backend/access/heap/heapam_xlog.c49
-rw-r--r--src/backend/access/heap/visibilitymap.c70
-rw-r--r--src/backend/access/rmgrdesc/heapdesc.c6
-rw-r--r--src/include/access/visibilitymap.h4
5 files changed, 154 insertions, 18 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ed0c0c2dc9f..568696333c2 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+ {
all_frozen_set = true;
+ /* Lock the vmbuffer before entering the critical section */
+ LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+ }
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* going to add further frozen rows to it.
*
* If we're only adding already frozen rows to a previously empty
- * page, mark it as all-visible.
+ * page, mark it as all-frozen and update the visibility map. We're
+ * already holding a pin on the vmbuffer.
*/
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
{
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
vmbuffer, VISIBILITYMAP_VALID_BITS);
}
else if (all_frozen_set)
+ {
PageSetAllVisible(page);
+ visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
+ vmbuffer,
+ VISIBILITYMAP_ALL_VISIBLE |
+ VISIBILITYMAP_ALL_FROZEN,
+ relation->rd_locator);
+ }
/*
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
xlrec->flags = 0;
if (all_visible_cleared)
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+
+ /*
+ * We don't have to worry about including a conflict xid in the
+ * WAL record, as HEAP_INSERT_FROZEN intentionally violates
+ * visibility rules.
+ */
if (all_frozen_set)
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
@@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
XLogBeginInsert();
XLogRegisterData(xlrec, tupledata - scratch.data);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+ if (all_frozen_set)
+ XLogRegisterBuffer(1, vmbuffer, 0);
XLogRegisterBufData(0, tupledata, totaldatalen);
@@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
recptr = XLogInsert(RM_HEAP2_ID, info);
PageSetLSN(page, recptr);
+ if (all_frozen_set)
+ {
+ Assert(BufferIsDirty(vmbuffer));
+ PageSetLSN(BufferGetPage(vmbuffer), recptr);
+ }
}
END_CRIT_SECTION();
- /*
- * If we've frozen everything on the page, update the visibilitymap.
- * We're already holding pin on the vmbuffer.
- */
if (all_frozen_set)
- {
- /*
- * It's fine to use InvalidTransactionId here - this is only used
- * when HEAP_INSERT_FROZEN is specified, which intentionally
- * violates visibility rules.
- */
- visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
- InvalidXLogRecPtr, vmbuffer,
- InvalidTransactionId,
- VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
- }
+ LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
UnlockReleaseBuffer(buffer);
ndone += nthispage;
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index cf843277938..30e339d8fe2 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
int i;
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action;
+ Buffer vmbuffer = InvalidBuffer;
/*
* Insertion doesn't overwrite MVCC data, so no conflict processing is
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rlocator);
- Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer);
visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
ReleaseBuffer(vmbuffer);
+ vmbuffer = InvalidBuffer;
FreeFakeRelcacheEntry(reln);
}
@@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record)
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
+ buffer = InvalidBuffer;
+
+ /*
+ * Read and update the visibility map (VM) block.
+ *
+ * We must always redo VM changes, even if the corresponding heap page
+ * update was skipped due to the LSN interlock. Each VM block covers
+ * multiple heap pages, so later WAL records may update other bits in the
+ * same block. If this record includes an FPI (full-page image),
+ * subsequent WAL records may depend on it to guard against torn pages.
+ *
+ * Heap page changes are replayed first to preserve the invariant:
+ * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
+ *
+ * Note that we released the heap page lock above. During normal
+ * operation, this would be unsafe — a concurrent modification could
+ * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
+ * invariant.
+ *
+ * During recovery, however, no concurrent writers exist. Therefore,
+ * updating the VM without holding the heap page lock is safe enough. This
+ * same approach is taken when replaying xl_heap_visible records (see
+ * heap_xlog_visible()).
+ */
+ if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
+ XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
+ &vmbuffer) == BLK_NEEDS_REDO)
+ {
+ Page vmpage = BufferGetPage(vmbuffer);
+
+ /* initialize the page if it was read as zeros */
+ if (PageIsNew(vmpage))
+ PageInit(vmpage, BLCKSZ, 0);
+
+ visibilitymap_set_vmbits(blkno,
+ vmbuffer,
+ VISIBILITYMAP_ALL_VISIBLE |
+ VISIBILITYMAP_ALL_FROZEN,
+ rlocator);
+
+ PageSetLSN(vmpage, lsn);
+ }
+
+ if (BufferIsValid(vmbuffer))
+ UnlockReleaseBuffer(vmbuffer);
+
/*
* If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do much
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 0414ce1945c..2f5e61e2392 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -14,7 +14,8 @@
* visibilitymap_clear - clear bits for one page in the visibility map
* visibilitymap_pin - pin a map page for setting a bit
* visibilitymap_pin_ok - check whether correct map page is already pinned
- * visibilitymap_set - set a bit in a previously pinned page
+ * visibilitymap_set - set bit(s) in a previously pinned page and log
+ * visibilitymap_set_vmbits - set bit(s) in a pinned page
* visibilitymap_get_status - get status of bits
* visibilitymap_count - count number of bits set in visibility map
* visibilitymap_prepare_truncate -
@@ -323,6 +324,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
}
/*
+ * Set VM (visibility map) flags in the VM block in vmBuf.
+ *
+ * This function is intended for callers that log VM changes together
+ * with the heap page modifications that rendered the page all-visible.
+ * Callers that log VM changes separately should use visibilitymap_set().
+ *
+ * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
+ * corresponding to heapBlk.
+ *
+ * In normal operation (not recovery), this must be called inside a critical
+ * section that also applies the necessary heap page changes and, if
+ * applicable, emits WAL.
+ *
+ * The caller is responsible for ensuring consistency between the heap page
+ * and the VM page by holding a pin and exclusive lock on the buffer
+ * containing heapBlk.
+ *
+ * rlocator is used only for debugging messages.
+ */
+uint8
+visibilitymap_set_vmbits(BlockNumber heapBlk,
+ Buffer vmBuf, uint8 flags,
+ const RelFileLocator rlocator)
+{
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+ uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
+ uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
+ Page page;
+ uint8 *map;
+ uint8 status;
+
+#ifdef TRACE_VISIBILITYMAP
+ elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
+ flags,
+ relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
+ heapBlk);
+#endif
+
+ /* Call in same critical section where WAL is emitted. */
+ Assert(InRecovery || CritSectionCount > 0);
+
+ /* Flags should be valid. Also never clear bits with this function */
+ Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
+
+ /* Must never set all_frozen bit without also setting all_visible bit */
+ Assert(flags != VISIBILITYMAP_ALL_FROZEN);
+
+ /* Check that we have the right VM page pinned */
+ if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
+ elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
+
+ Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
+
+ page = BufferGetPage(vmBuf);
+ map = (uint8 *) PageGetContents(page);
+
+ status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
+ if (flags != status)
+ {
+ map[mapByte] |= (flags << mapOffset);
+ MarkBufferDirty(vmBuf);
+ }
+
+ return status;
+}
+
+/*
* visibilitymap_get_status - get status of bits
*
* Are all tuples on heapBlk visible to all or are marked frozen, according
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 82b62c95de5..056beb24d40 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -16,6 +16,7 @@
#include "access/heapam_xlog.h"
#include "access/rmgrdesc_utils.h"
+#include "access/visibilitymapdefs.h"
#include "storage/standbydefs.h"
/*
@@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
xlrec->flags);
+ if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+ appendStringInfo(buf, ", vm_flags: 0x%02X",
+ VISIBILITYMAP_ALL_VISIBLE |
+ VISIBILITYMAP_ALL_FROZEN);
+
if (XLogRecHasBlockData(record, 0) && !isinit)
{
appendStringInfoString(buf, ", offsets:");
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index be21c6dd1a3..c6fa37be968 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -18,6 +18,7 @@
#include "access/xlogdefs.h"
#include "storage/block.h"
#include "storage/buf.h"
+#include "storage/relfilelocator.h"
#include "utils/relcache.h"
/* Macros for visibilitymap test */
@@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel,
Buffer vmBuf,
TransactionId cutoff_xid,
uint8 flags);
+extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
+ Buffer vmBuf, uint8 flags,
+ const RelFileLocator rlocator);
extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
extern BlockNumber visibilitymap_prepare_truncate(Relation rel,