diff options
author | Melanie Plageman <melanieplageman@gmail.com> | 2025-10-09 16:25:50 -0400 |
---|---|---|
committer | Melanie Plageman <melanieplageman@gmail.com> | 2025-10-09 16:29:01 -0400 |
commit | d96f87332b3786abd23cba47459546799c562b8c (patch) | |
tree | 4f4fdd37f5047ec9b09dee98679983c6545999df | |
parent | 1b073cba4993b31fbf820504f297efce5d951c00 (diff) |
Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE
Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting
bits in the VM, specify the VM block changes in the
XLOG_HEAP2_MULTI_INSERT record.
This halves the number of WAL records emitted by COPY FREEZE.
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
-rw-r--r-- | src/backend/access/heap/heapam.c | 43 | ||||
-rw-r--r-- | src/backend/access/heap/heapam_xlog.c | 49 | ||||
-rw-r--r-- | src/backend/access/heap/visibilitymap.c | 70 | ||||
-rw-r--r-- | src/backend/access/rmgrdesc/heapdesc.c | 6 | ||||
-rw-r--r-- | src/include/access/visibilitymap.h | 4 |
5 files changed, 154 insertions, 18 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index ed0c0c2dc9f..568696333c2 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0; if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) + { all_frozen_set = true; + /* Lock the vmbuffer before entering the critical section */ + LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE); + } /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * going to add further frozen rows to it. * * If we're only adding already frozen rows to a previously empty - * page, mark it as all-visible. + * page, mark it as all-frozen and update the visibility map. We're + * already holding a pin on the vmbuffer. */ if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN)) { @@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, vmbuffer, VISIBILITYMAP_VALID_BITS); } else if (all_frozen_set) + { PageSetAllVisible(page); + visibilitymap_set_vmbits(BufferGetBlockNumber(buffer), + vmbuffer, + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN, + relation->rd_locator); + } /* * XXX Should we set PageSetPrunable on this page ? See heap_insert() @@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, xlrec->flags = 0; if (all_visible_cleared) xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED; + + /* + * We don't have to worry about including a conflict xid in the + * WAL record, as HEAP_INSERT_FROZEN intentionally violates + * visibility rules. + */ if (all_frozen_set) xlrec->flags = XLH_INSERT_ALL_FROZEN_SET; @@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, XLogBeginInsert(); XLogRegisterData(xlrec, tupledata - scratch.data); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); + if (all_frozen_set) + XLogRegisterBuffer(1, vmbuffer, 0); XLogRegisterBufData(0, tupledata, totaldatalen); @@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, recptr = XLogInsert(RM_HEAP2_ID, info); PageSetLSN(page, recptr); + if (all_frozen_set) + { + Assert(BufferIsDirty(vmbuffer)); + PageSetLSN(BufferGetPage(vmbuffer), recptr); + } } END_CRIT_SECTION(); - /* - * If we've frozen everything on the page, update the visibilitymap. - * We're already holding pin on the vmbuffer. - */ if (all_frozen_set) - { - /* - * It's fine to use InvalidTransactionId here - this is only used - * when HEAP_INSERT_FROZEN is specified, which intentionally - * violates visibility rules. - */ - visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer, - InvalidXLogRecPtr, vmbuffer, - InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); - } + LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK); UnlockReleaseBuffer(buffer); ndone += nthispage; diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index cf843277938..30e339d8fe2 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record) int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; + Buffer vmbuffer = InvalidBuffer; /* * Insertion doesn't overwrite MVCC data, so no conflict processing is @@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) { Relation reln = CreateFakeRelcacheEntry(rlocator); - Buffer vmbuffer = InvalidBuffer; visibilitymap_pin(reln, blkno, &vmbuffer); visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); ReleaseBuffer(vmbuffer); + vmbuffer = InvalidBuffer; FreeFakeRelcacheEntry(reln); } @@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record) if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); + buffer = InvalidBuffer; + + /* + * Read and update the visibility map (VM) block. + * + * We must always redo VM changes, even if the corresponding heap page + * update was skipped due to the LSN interlock. Each VM block covers + * multiple heap pages, so later WAL records may update other bits in the + * same block. If this record includes an FPI (full-page image), + * subsequent WAL records may depend on it to guard against torn pages. + * + * Heap page changes are replayed first to preserve the invariant: + * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set. + * + * Note that we released the heap page lock above. During normal + * operation, this would be unsafe — a concurrent modification could + * clear PD_ALL_VISIBLE while the VM bit remained set, violating the + * invariant. + * + * During recovery, however, no concurrent writers exist. Therefore, + * updating the VM without holding the heap page lock is safe enough. This + * same approach is taken when replaying xl_heap_visible records (see + * heap_xlog_visible()). + */ + if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) && + XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false, + &vmbuffer) == BLK_NEEDS_REDO) + { + Page vmpage = BufferGetPage(vmbuffer); + + /* initialize the page if it was read as zeros */ + if (PageIsNew(vmpage)) + PageInit(vmpage, BLCKSZ, 0); + + visibilitymap_set_vmbits(blkno, + vmbuffer, + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN, + rlocator); + + PageSetLSN(vmpage, lsn); + } + + if (BufferIsValid(vmbuffer)) + UnlockReleaseBuffer(vmbuffer); + /* * If the page is running low on free space, update the FSM as well. * Arbitrarily, our definition of "low" is less than 20%. We can't do much diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 0414ce1945c..2f5e61e2392 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -14,7 +14,8 @@ * visibilitymap_clear - clear bits for one page in the visibility map * visibilitymap_pin - pin a map page for setting a bit * visibilitymap_pin_ok - check whether correct map page is already pinned - * visibilitymap_set - set a bit in a previously pinned page + * visibilitymap_set - set bit(s) in a previously pinned page and log + * visibilitymap_set_vmbits - set bit(s) in a pinned page * visibilitymap_get_status - get status of bits * visibilitymap_count - count number of bits set in visibility map * visibilitymap_prepare_truncate - @@ -323,6 +324,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, } /* + * Set VM (visibility map) flags in the VM block in vmBuf. + * + * This function is intended for callers that log VM changes together + * with the heap page modifications that rendered the page all-visible. + * Callers that log VM changes separately should use visibilitymap_set(). + * + * vmBuf must be pinned and exclusively locked, and it must cover the VM bits + * corresponding to heapBlk. + * + * In normal operation (not recovery), this must be called inside a critical + * section that also applies the necessary heap page changes and, if + * applicable, emits WAL. + * + * The caller is responsible for ensuring consistency between the heap page + * and the VM page by holding a pin and exclusive lock on the buffer + * containing heapBlk. + * + * rlocator is used only for debugging messages. + */ +uint8 +visibilitymap_set_vmbits(BlockNumber heapBlk, + Buffer vmBuf, uint8 flags, + const RelFileLocator rlocator) +{ + BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); + uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); + uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk); + Page page; + uint8 *map; + uint8 status; + +#ifdef TRACE_VISIBILITYMAP + elog(DEBUG1, "vm_set flags 0x%02X for %s %d", + flags, + relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str, + heapBlk); +#endif + + /* Call in same critical section where WAL is emitted. */ + Assert(InRecovery || CritSectionCount > 0); + + /* Flags should be valid. Also never clear bits with this function */ + Assert((flags & VISIBILITYMAP_VALID_BITS) == flags); + + /* Must never set all_frozen bit without also setting all_visible bit */ + Assert(flags != VISIBILITYMAP_ALL_FROZEN); + + /* Check that we have the right VM page pinned */ + if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock) + elog(ERROR, "wrong VM buffer passed to visibilitymap_set"); + + Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE)); + + page = BufferGetPage(vmBuf); + map = (uint8 *) PageGetContents(page); + + status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS; + if (flags != status) + { + map[mapByte] |= (flags << mapOffset); + MarkBufferDirty(vmBuf); + } + + return status; +} + +/* * visibilitymap_get_status - get status of bits * * Are all tuples on heapBlk visible to all or are marked frozen, according diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 82b62c95de5..056beb24d40 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -16,6 +16,7 @@ #include "access/heapam_xlog.h" #include "access/rmgrdesc_utils.h" +#include "access/visibilitymapdefs.h" #include "storage/standbydefs.h" /* @@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples, xlrec->flags); + if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) + appendStringInfo(buf, ", vm_flags: 0x%02X", + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN); + if (XLogRecHasBlockData(record, 0) && !isinit) { appendStringInfoString(buf, ", offsets:"); diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index be21c6dd1a3..c6fa37be968 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -18,6 +18,7 @@ #include "access/xlogdefs.h" #include "storage/block.h" #include "storage/buf.h" +#include "storage/relfilelocator.h" #include "utils/relcache.h" /* Macros for visibilitymap test */ @@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags); +extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk, + Buffer vmBuf, uint8 flags, + const RelFileLocator rlocator); extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen); extern BlockNumber visibilitymap_prepare_truncate(Relation rel, |