diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtxlog.c')
-rw-r--r-- | src/backend/access/nbtree/nbtxlog.c | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 5fbb481a4fe..f0fe8a3892e 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -489,28 +489,47 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) BTPageOpaque opaque; /* - * If queries might be active then we need to ensure every block is + * If queries might be active then we need to ensure every leaf page is * unpinned between the lastBlockVacuumed and the current block, if there - * are any. This ensures that every block in the index is touched during - * VACUUM as required to ensure scans work correctly. + * are any. This prevents replay of the VACUUM from reaching the stage of + * removing heap tuples while there could still be indexscans "in flight" + * to those particular tuples (see nbtree/README). + * + * It might be worth checking if there are actually any backends running; + * if not, we could just skip this. + * + * Since VACUUM can visit leaf pages out-of-order, it might issue records + * with lastBlockVacuumed >= block; that's not an error, it just means + * nothing to do now. + * + * Note: since we touch all pages in the range, we will lock non-leaf + * pages, and also any empty (all-zero) pages that may be in the index. It + * doesn't seem worth the complexity to avoid that. But it's important + * that HotStandbyActiveInReplay() will not return true if the database + * isn't yet consistent; so we need not fear reading still-corrupt blocks + * here during crash recovery. */ - if (standbyState == STANDBY_SNAPSHOT_READY && - (xlrec->lastBlockVacuumed + 1) != xlrec->block) + if (HotStandbyActiveInReplay()) { - BlockNumber blkno = xlrec->lastBlockVacuumed + 1; + BlockNumber blkno; - for (; blkno < xlrec->block; blkno++) + for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++) { /* + * We use RBM_NORMAL_NO_LOG mode because it's not an error + * condition to see all-zero pages. The original btvacuumpage + * scan would have skipped over all-zero pages, noting them in FSM + * but not bothering to initialize them just yet; so we mustn't + * throw an error here. (We could skip acquiring the cleanup lock + * if PageIsNew, but it's probably not worth the cycles to test.) + * * XXX we don't actually need to read the block, we just need to * confirm it is unpinned. If we had a special call into the * buffer manager we could optimise this so that if the block is * not in shared_buffers we confirm it as unpinned. - * - * Another simple optimization would be to check if there's any - * backends running; if not, we could just skip this. */ - buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL); + buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, + RBM_NORMAL_NO_LOG); if (BufferIsValid(buffer)) { LockBufferForCleanup(buffer); |