summaryrefslogtreecommitdiff
path: root/src/backend/access/heap/pruneheap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/heap/pruneheap.c')
-rw-r--r--src/backend/access/heap/pruneheap.c407
1 files changed, 344 insertions, 63 deletions
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 69332b0d25c..4e58c2c2ff4 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -153,7 +153,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
* that during on-access pruning with the current implementation.
*/
heap_page_prune(relation, buffer, vistest, false,
- &presult, NULL);
+ &presult, PRUNE_ON_ACCESS, NULL);
/*
* Report the number of tuples reclaimed to pgstats. This is
@@ -204,6 +204,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
* tuples removed and the number of line pointers newly marked LP_DEAD.
* heap_page_prune() is responsible for initializing it.
*
+ * reason indicates why the pruning is performed. It is included in the WAL
+ * record for debugging and analysis purposes, but otherwise has no effect.
+ *
* off_loc is the offset location required by the caller to use in error
* callback.
*/
@@ -212,6 +215,7 @@ heap_page_prune(Relation relation, Buffer buffer,
GlobalVisState *vistest,
bool mark_unused_now,
PruneResult *presult,
+ PruneReason reason,
OffsetNumber *off_loc)
{
Page page = BufferGetPage(buffer);
@@ -338,7 +342,7 @@ heap_page_prune(Relation relation, Buffer buffer,
* Apply the planned item changes, then repair page fragmentation, and
* update the page's hint bit about whether it has free line pointers.
*/
- heap_page_prune_execute(buffer,
+ heap_page_prune_execute(buffer, false,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
prstate.nowunused, prstate.nunused);
@@ -359,44 +363,17 @@ heap_page_prune(Relation relation, Buffer buffer,
MarkBufferDirty(buffer);
/*
- * Emit a WAL XLOG_HEAP2_PRUNE record showing what we did
+ * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
*/
if (RelationNeedsWAL(relation))
{
- xl_heap_prune xlrec;
- XLogRecPtr recptr;
-
- xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(relation);
- xlrec.snapshotConflictHorizon = prstate.snapshotConflictHorizon;
- xlrec.nredirected = prstate.nredirected;
- xlrec.ndead = prstate.ndead;
-
- XLogBeginInsert();
- XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
-
- XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
-
- /*
- * The OffsetNumber arrays are not actually in the buffer, but we
- * pretend that they are. When XLogInsert stores the whole
- * buffer, the offset arrays need not be stored too.
- */
- if (prstate.nredirected > 0)
- XLogRegisterBufData(0, (char *) prstate.redirected,
- prstate.nredirected *
- sizeof(OffsetNumber) * 2);
-
- if (prstate.ndead > 0)
- XLogRegisterBufData(0, (char *) prstate.nowdead,
- prstate.ndead * sizeof(OffsetNumber));
-
- if (prstate.nunused > 0)
- XLogRegisterBufData(0, (char *) prstate.nowunused,
- prstate.nunused * sizeof(OffsetNumber));
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_PRUNE);
-
- PageSetLSN(BufferGetPage(buffer), recptr);
+ log_heap_prune_and_freeze(relation, buffer,
+ prstate.snapshotConflictHorizon,
+ true, reason,
+ NULL, 0,
+ prstate.redirected, prstate.nredirected,
+ prstate.nowdead, prstate.ndead,
+ prstate.nowunused, prstate.nunused);
}
}
else
@@ -827,11 +804,16 @@ heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
/*
* Perform the actual page changes needed by heap_page_prune.
- * It is expected that the caller has a full cleanup lock on the
- * buffer.
+ *
+ * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
+ * as unused, not redirecting or removing anything else. The
+ * PageRepairFragmentation() call is skipped in that case.
+ *
+ * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
+ * the buffer. If it is set, an ordinary exclusive lock suffices.
*/
void
-heap_page_prune_execute(Buffer buffer,
+heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
OffsetNumber *nowunused, int nunused)
@@ -843,6 +825,9 @@ heap_page_prune_execute(Buffer buffer,
/* Shouldn't be called unless there's something to do */
Assert(nredirected > 0 || ndead > 0 || nunused > 0);
+ /* If 'lp_truncate_only', we can only remove already-dead line pointers */
+ Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
+
/* Update all redirected line pointers */
offnum = redirected;
for (int i = 0; i < nredirected; i++)
@@ -941,23 +926,29 @@ heap_page_prune_execute(Buffer buffer,
#ifdef USE_ASSERT_CHECKING
- /*
- * When heap_page_prune() was called, mark_unused_now may have been
- * passed as true, which allows would-be LP_DEAD items to be made
- * LP_UNUSED instead. This is only possible if the relation has no
- * indexes. If there are any dead items, then mark_unused_now was not
- * true and every item being marked LP_UNUSED must refer to a
- * heap-only tuple.
- */
- if (ndead > 0)
+ if (lp_truncate_only)
{
- Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
- htup = (HeapTupleHeader) PageGetItem(page, lp);
- Assert(HeapTupleHeaderIsHeapOnly(htup));
+ /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
+ Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
}
else
{
- Assert(ItemIdIsUsed(lp));
+ /*
+ * When heap_page_prune() was called, mark_unused_now may have
+ * been passed as true, which allows would-be LP_DEAD items to be
+ * made LP_UNUSED instead. This is only possible if the relation
+ * has no indexes. If there are any dead items, then
+ * mark_unused_now was not true and every item being marked
+ * LP_UNUSED must refer to a heap-only tuple.
+ */
+ if (ndead > 0)
+ {
+ Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+ Assert(HeapTupleHeaderIsHeapOnly(htup));
+ }
+ else
+ Assert(ItemIdIsUsed(lp));
}
#endif
@@ -965,17 +956,22 @@ heap_page_prune_execute(Buffer buffer,
ItemIdSetUnused(lp);
}
- /*
- * Finally, repair any fragmentation, and update the page's hint bit about
- * whether it has free pointers.
- */
- PageRepairFragmentation(page);
+ if (lp_truncate_only)
+ PageTruncateLinePointerArray(page);
+ else
+ {
+ /*
+ * Finally, repair any fragmentation, and update the page's hint bit
+ * about whether it has free pointers.
+ */
+ PageRepairFragmentation(page);
- /*
- * Now that the page has been modified, assert that redirect items still
- * point to valid targets.
- */
- page_verify_redirects(page);
+ /*
+ * Now that the page has been modified, assert that redirect items
+ * still point to valid targets.
+ */
+ page_verify_redirects(page);
+ }
}
@@ -1144,3 +1140,288 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
}
}
}
+
+
+/*
+ * Compare fields that describe actions required to freeze tuple with caller's
+ * open plan. If everything matches then the frz tuple plan is equivalent to
+ * caller's plan.
+ */
+static inline bool
+heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
+{
+ if (plan->xmax == frz->xmax &&
+ plan->t_infomask2 == frz->t_infomask2 &&
+ plan->t_infomask == frz->t_infomask &&
+ plan->frzflags == frz->frzflags)
+ return true;
+
+ /* Caller must call heap_log_freeze_new_plan again for frz */
+ return false;
+}
+
+/*
+ * Comparator used to deduplicate XLOG_HEAP2_FREEZE_PAGE freeze plans
+ */
+static int
+heap_log_freeze_cmp(const void *arg1, const void *arg2)
+{
+ HeapTupleFreeze *frz1 = (HeapTupleFreeze *) arg1;
+ HeapTupleFreeze *frz2 = (HeapTupleFreeze *) arg2;
+
+ if (frz1->xmax < frz2->xmax)
+ return -1;
+ else if (frz1->xmax > frz2->xmax)
+ return 1;
+
+ if (frz1->t_infomask2 < frz2->t_infomask2)
+ return -1;
+ else if (frz1->t_infomask2 > frz2->t_infomask2)
+ return 1;
+
+ if (frz1->t_infomask < frz2->t_infomask)
+ return -1;
+ else if (frz1->t_infomask > frz2->t_infomask)
+ return 1;
+
+ if (frz1->frzflags < frz2->frzflags)
+ return -1;
+ else if (frz1->frzflags > frz2->frzflags)
+ return 1;
+
+ /*
+ * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
+ * (So the tuples will share a single canonical freeze plan.)
+ *
+ * We tiebreak on page offset number to keep each freeze plan's page
+ * offset number array individually sorted. (Unnecessary, but be tidy.)
+ */
+ if (frz1->offset < frz2->offset)
+ return -1;
+ else if (frz1->offset > frz2->offset)
+ return 1;
+
+ Assert(false);
+ return 0;
+}
+
+/*
+ * Start new plan initialized using tuple-level actions. At least one tuple
+ * will have steps required to freeze described by caller's plan during REDO.
+ */
+static inline void
+heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
+{
+ plan->xmax = frz->xmax;
+ plan->t_infomask2 = frz->t_infomask2;
+ plan->t_infomask = frz->t_infomask;
+ plan->frzflags = frz->frzflags;
+ plan->ntuples = 1; /* for now */
+}
+
+/*
+ * Deduplicate tuple-based freeze plans so that each distinct set of
+ * processing steps is only stored once in XLOG_HEAP2_FREEZE_PAGE records.
+ * Called during original execution of freezing (for logged relations).
+ *
+ * Return value is number of plans set in *plans_out for caller. Also writes
+ * an array of offset numbers into *offsets_out output argument for caller
+ * (actually there is one array per freeze plan, but that's not of immediate
+ * concern to our caller).
+ */
+static int
+heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
+ xlhp_freeze_plan *plans_out,
+ OffsetNumber *offsets_out)
+{
+ int nplans = 0;
+
+ /* Sort tuple-based freeze plans in the order required to deduplicate */
+ qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
+
+ for (int i = 0; i < ntuples; i++)
+ {
+ HeapTupleFreeze *frz = tuples + i;
+
+ if (i == 0)
+ {
+ /* New canonical freeze plan starting with first tup */
+ heap_log_freeze_new_plan(plans_out, frz);
+ nplans++;
+ }
+ else if (heap_log_freeze_eq(plans_out, frz))
+ {
+ /* tup matches open canonical plan -- include tup in it */
+ Assert(offsets_out[i - 1] < frz->offset);
+ plans_out->ntuples++;
+ }
+ else
+ {
+ /* Tup doesn't match current plan -- done with it now */
+ plans_out++;
+
+ /* New canonical freeze plan starting with this tup */
+ heap_log_freeze_new_plan(plans_out, frz);
+ nplans++;
+ }
+
+ /*
+ * Save page offset number in dedicated buffer in passing.
+ *
+ * REDO routine relies on the record's offset numbers array grouping
+ * offset numbers by freeze plan. The sort order within each grouping
+ * is ascending offset number order, just to keep things tidy.
+ */
+ offsets_out[i] = frz->offset;
+ }
+
+ Assert(nplans > 0 && nplans <= ntuples);
+
+ return nplans;
+}
+
+/*
+ * Write an XLOG_HEAP2_PRUNE_FREEZE WAL record
+ *
+ * This is used for several different page maintenance operations:
+ *
+ * - Page pruning, in VACUUM's 1st pass or on access: Some items are
+ * redirected, some marked dead, and some removed altogether.
+ *
+ * - Freezing: Items are marked as 'frozen'.
+ *
+ * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
+ *
+ * They have enough commonalities that we use a single WAL record for them
+ * all.
+ *
+ * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
+ * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
+ * replaying 'unused' items depends on whether they were all previously marked
+ * as dead.
+ *
+ * Note: This function scribbles on the 'frozen' array.
+ *
+ * Note: This is called in a critical section, so careful what you do here.
+ */
+void
+log_heap_prune_and_freeze(Relation relation, Buffer buffer,
+ TransactionId conflict_xid,
+ bool cleanup_lock,
+ PruneReason reason,
+ HeapTupleFreeze *frozen, int nfrozen,
+ OffsetNumber *redirected, int nredirected,
+ OffsetNumber *dead, int ndead,
+ OffsetNumber *unused, int nunused)
+{
+ xl_heap_prune xlrec;
+ XLogRecPtr recptr;
+ uint8 info;
+
+ /* The following local variables hold data registered in the WAL record: */
+ xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
+ xlhp_freeze_plans freeze_plans;
+ xlhp_prune_items redirect_items;
+ xlhp_prune_items dead_items;
+ xlhp_prune_items unused_items;
+ OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
+
+ xlrec.flags = 0;
+
+ /*
+ * Prepare data for the buffer. The arrays are not actually in the
+ * buffer, but we pretend that they are. When XLogInsert stores a full
+ * page image, the arrays can be omitted.
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ if (nfrozen > 0)
+ {
+ int nplans;
+
+ xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
+
+ /*
+ * Prepare deduplicated representation for use in the WAL record. This
+ * destructively sorts frozen tuples array in-place.
+ */
+ nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
+
+ freeze_plans.nplans = nplans;
+ XLogRegisterBufData(0, (char *) &freeze_plans,
+ offsetof(xlhp_freeze_plans, plans));
+ XLogRegisterBufData(0, (char *) plans,
+ sizeof(xlhp_freeze_plan) * nplans);
+ }
+ if (nredirected > 0)
+ {
+ xlrec.flags |= XLHP_HAS_REDIRECTIONS;
+
+ redirect_items.ntargets = nredirected;
+ XLogRegisterBufData(0, (char *) &redirect_items,
+ offsetof(xlhp_prune_items, data));
+ XLogRegisterBufData(0, (char *) redirected,
+ sizeof(OffsetNumber[2]) * nredirected);
+ }
+ if (ndead > 0)
+ {
+ xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
+
+ dead_items.ntargets = ndead;
+ XLogRegisterBufData(0, (char *) &dead_items,
+ offsetof(xlhp_prune_items, data));
+ XLogRegisterBufData(0, (char *) dead,
+ sizeof(OffsetNumber) * ndead);
+ }
+ if (nunused > 0)
+ {
+ xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
+
+ unused_items.ntargets = nunused;
+ XLogRegisterBufData(0, (char *) &unused_items,
+ offsetof(xlhp_prune_items, data));
+ XLogRegisterBufData(0, (char *) unused,
+ sizeof(OffsetNumber) * nunused);
+ }
+ if (nfrozen > 0)
+ XLogRegisterBufData(0, (char *) frz_offsets,
+ sizeof(OffsetNumber) * nfrozen);
+
+ /*
+ * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
+ * flag above.
+ */
+ if (RelationIsAccessibleInLogicalDecoding(relation))
+ xlrec.flags |= XLHP_IS_CATALOG_REL;
+ if (TransactionIdIsValid(conflict_xid))
+ xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
+ if (cleanup_lock)
+ xlrec.flags |= XLHP_CLEANUP_LOCK;
+ else
+ {
+ Assert(nredirected == 0 && ndead == 0);
+ /* also, any items in 'unused' must've been LP_DEAD previously */
+ }
+ XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
+ if (TransactionIdIsValid(conflict_xid))
+ XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
+
+ switch (reason)
+ {
+ case PRUNE_ON_ACCESS:
+ info = XLOG_HEAP2_PRUNE_ON_ACCESS;
+ break;
+ case PRUNE_VACUUM_SCAN:
+ info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
+ break;
+ case PRUNE_VACUUM_CLEANUP:
+ info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
+ break;
+ default:
+ elog(ERROR, "unrecognized prune reason: %d", (int) reason);
+ break;
+ }
+ recptr = XLogInsert(RM_HEAP2_ID, info);
+
+ PageSetLSN(BufferGetPage(buffer), recptr);
+}