summaryrefslogtreecommitdiff
path: root/src/backend/commands/vacuum.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-09-20 17:56:33 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-09-20 17:56:33 +0000
commit282d2a03dd30804b01f8042f640d638c2ee76604 (patch)
tree004f08ce31f1bfb03ab55571ad7867babe5b3d7f /src/backend/commands/vacuum.c
parentbbf4fdc2538097bb3103806e1419ceef1f289203 (diff)
HOT updates. When we update a tuple without changing any of its indexed
columns, and the new version can be stored on the same heap page, we no longer generate extra index entries for the new version. Instead, index searches follow the HOT-chain links to ensure they find the correct tuple version. In addition, this patch introduces the ability to "prune" dead tuples on a per-page basis, without having to do a complete VACUUM pass to recover space. VACUUM is still needed to clean up dead index entries, however. Pavan Deolasee, with help from a bunch of other people.
Diffstat (limited to 'src/backend/commands/vacuum.c')
-rw-r--r--src/backend/commands/vacuum.c245
1 files changed, 210 insertions, 35 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index f9b9423534e..5630fc2730d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.358 2007/09/12 22:10:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.359 2007/09/20 17:56:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -124,10 +124,11 @@ typedef VTupleMoveData *VTupleMove;
typedef struct VRelStats
{
/* miscellaneous statistics */
- BlockNumber rel_pages;
- double rel_tuples;
- Size min_tlen;
- Size max_tlen;
+ BlockNumber rel_pages; /* pages in relation */
+ double rel_tuples; /* tuples that remain after vacuuming */
+ double rel_indexed_tuples; /* indexed tuples that remain */
+ Size min_tlen; /* min surviving tuple size */
+ Size max_tlen; /* max surviving tuple size */
bool hasindex;
/* vtlinks array for tuple chain following - sorted by new_tid */
int num_vtlinks;
@@ -1177,6 +1178,7 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
vacrelstats->rel_pages = 0;
vacrelstats->rel_tuples = 0;
+ vacrelstats->rel_indexed_tuples = 0;
vacrelstats->hasindex = false;
/* scan the heap */
@@ -1195,13 +1197,13 @@ full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
{
for (i = 0; i < nindexes; i++)
vacuum_index(&vacuum_pages, Irel[i],
- vacrelstats->rel_tuples, 0);
+ vacrelstats->rel_indexed_tuples, 0);
}
else
{
/* just scan indexes to update statistic */
for (i = 0; i < nindexes; i++)
- scan_index(Irel[i], vacrelstats->rel_tuples);
+ scan_index(Irel[i], vacrelstats->rel_indexed_tuples);
}
}
@@ -1256,6 +1258,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
BlockNumber empty_pages,
empty_end_pages;
double num_tuples,
+ num_indexed_tuples,
tups_vacuumed,
nkeep,
nunused;
@@ -1278,7 +1281,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
relname)));
empty_pages = empty_end_pages = 0;
- num_tuples = tups_vacuumed = nkeep = nunused = 0;
+ num_tuples = num_indexed_tuples = tups_vacuumed = nkeep = nunused = 0;
free_space = 0;
nblocks = RelationGetNumberOfBlocks(onerel);
@@ -1313,9 +1316,13 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
* background writer will try to write the page if it's already marked
* dirty. To ensure that invalid data doesn't get written to disk, we
* must take exclusive buffer lock wherever we potentially modify
- * pages.
+ * pages. In fact, we insist on cleanup lock so that we can safely
+ * call heap_page_prune(). (This might be overkill, since the bgwriter
+ * pays no attention to individual tuples, but on the other hand it's
+ * unlikely that the bgwriter has this particular page pinned at this
+ * instant. So violating the coding rule would buy us little anyway.)
*/
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ LockBufferForCleanup(buf);
vacpage->blkno = blkno;
vacpage->offsets_used = 0;
@@ -1356,6 +1363,21 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
continue;
}
+ /*
+ * Prune all HOT-update chains in this page.
+ *
+ * We use the redirect_move option so that redirecting line pointers
+ * get collapsed out; this allows us to not worry about them below.
+ *
+ * We count tuples removed by the pruning step as removed by VACUUM.
+ */
+ tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin,
+ true, false);
+
+ /*
+ * Now scan the page to collect vacuumable items and check for
+ * tuples requiring freezing.
+ */
nfrozen = 0;
notup = true;
maxoff = PageGetMaxOffsetNumber(page);
@@ -1369,7 +1391,9 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
/*
* Collect un-used items too - it's possible to have indexes
- * pointing here after crash.
+ * pointing here after crash. (That's an ancient comment and
+ * is likely obsolete with WAL, but we might as well continue
+ * to check for such problems.)
*/
if (!ItemIdIsUsed(itemid))
{
@@ -1378,6 +1402,23 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
continue;
}
+ /*
+ * DEAD item pointers are to be vacuumed normally; but we don't
+ * count them in tups_vacuumed, else we'd be double-counting
+ * (at least in the common case where heap_page_prune() just
+ * freed up a non-HOT tuple).
+ */
+ if (ItemIdIsDead(itemid))
+ {
+ vacpage->offsets[vacpage->offsets_free++] = offnum;
+ continue;
+ }
+
+ /* Shouldn't have any redirected items anymore */
+ if (!ItemIdIsNormal(itemid))
+ elog(ERROR, "relation \"%s\" TID %u/%u: unexpected redirect item",
+ relname, blkno, offnum);
+
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&(tuple.t_self), blkno, offnum);
@@ -1410,12 +1451,45 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
}
break;
case HEAPTUPLE_DEAD:
- tupgone = true; /* we can delete the tuple */
/*
- * We need not require XMIN_COMMITTED or XMAX_COMMITTED to
- * be set, since we will remove the tuple without any
- * further examination of its hint bits.
+ * Ordinarily, DEAD tuples would have been removed by
+ * heap_page_prune(), but it's possible that the tuple
+ * state changed since heap_page_prune() looked. In
+ * particular an INSERT_IN_PROGRESS tuple could have
+ * changed to DEAD if the inserter aborted. So this
+ * cannot be considered an error condition, though it
+ * does suggest that someone released a lock early.
+ *
+ * If the tuple is HOT-updated then it must only be
+ * removed by a prune operation; so we keep it as if it
+ * were RECENTLY_DEAD, and abandon shrinking. (XXX is it
+ * worth trying to make the shrinking code smart enough
+ * to handle this? It's an unusual corner case.)
+ *
+ * DEAD heap-only tuples can safely be removed if they
+ * aren't themselves HOT-updated, although this is a bit
+ * inefficient since we'll uselessly try to remove
+ * index entries for them.
*/
+ if (HeapTupleIsHotUpdated(&tuple))
+ {
+ nkeep += 1;
+ if (do_shrinking)
+ ereport(LOG,
+ (errmsg("relation \"%s\" TID %u/%u: dead HOT-updated tuple --- cannot shrink relation",
+ relname, blkno, offnum)));
+ do_shrinking = false;
+ }
+ else
+ {
+ tupgone = true; /* we can delete the tuple */
+ /*
+ * We need not require XMIN_COMMITTED or
+ * XMAX_COMMITTED to be set, since we will remove the
+ * tuple without any further examination of its hint
+ * bits.
+ */
+ }
break;
case HEAPTUPLE_RECENTLY_DEAD:
@@ -1530,6 +1604,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
else
{
num_tuples += 1;
+ if (!HeapTupleIsHeapOnly(&tuple))
+ num_indexed_tuples += 1;
notup = false;
if (tuple.t_len < min_tlen)
min_tlen = tuple.t_len;
@@ -1549,7 +1625,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
if (tempPage != NULL)
{
/* Some tuples are removable; figure free space after removal */
- PageRepairFragmentation(tempPage, NULL);
+ PageRepairFragmentation(tempPage);
vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, tempPage);
pfree(tempPage);
do_reap = true;
@@ -1558,7 +1634,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
{
/* Just use current available space */
vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
- /* Need to reap the page if it has LP_UNUSED line pointers */
+ /* Need to reap the page if it has UNUSED or DEAD line pointers */
do_reap = (vacpage->offsets_free > 0);
}
@@ -1621,6 +1697,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
/* save stats in the rel list for use later */
vacrelstats->rel_tuples = num_tuples;
+ vacrelstats->rel_indexed_tuples = num_indexed_tuples;
vacrelstats->rel_pages = nblocks;
if (num_tuples == 0)
min_tlen = max_tlen = 0;
@@ -1720,6 +1797,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
num_fraged_pages,
vacuumed_pages;
int keep_tuples = 0;
+ int keep_indexed_tuples = 0;
PGRUsage ru0;
pg_rusage_init(&ru0);
@@ -1845,6 +1923,16 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (!ItemIdIsUsed(itemid))
continue;
+ if (ItemIdIsDead(itemid))
+ {
+ /* just remember it for vacuum_page() */
+ vacpage->offsets[vacpage->offsets_free++] = offnum;
+ continue;
+ }
+
+ /* Shouldn't have any redirected items now */
+ Assert(ItemIdIsNormal(itemid));
+
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple_len = tuple.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&(tuple.t_self), blkno, offnum);
@@ -1906,12 +1994,28 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (i >= vacpage->offsets_free) /* not found */
{
vacpage->offsets[vacpage->offsets_free++] = offnum;
+ /*
+ * If this is not a heap-only tuple, there must be an
+ * index entry for this item which will be removed in
+ * the index cleanup. Decrement the keep_indexed_tuples
+ * count to remember this.
+ */
+ if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
+ keep_indexed_tuples--;
keep_tuples--;
}
}
else
{
vacpage->offsets[vacpage->offsets_free++] = offnum;
+ /*
+ * If this is not a heap-only tuple, there must be an
+ * index entry for this item which will be removed in
+ * the index cleanup. Decrement the keep_indexed_tuples
+ * count to remember this.
+ */
+ if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
+ keep_indexed_tuples--;
keep_tuples--;
}
continue;
@@ -2028,7 +2132,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
break;
}
nextItemid = PageGetItemId(nextPage, nextOffnum);
- if (!ItemIdIsUsed(nextItemid))
+ if (!ItemIdIsNormal(nextItemid))
{
ReleaseBuffer(nextBuf);
break;
@@ -2166,7 +2270,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
Pitemid = PageGetItemId(Ppage,
ItemPointerGetOffsetNumber(&(tp.t_self)));
/* this can't happen since we saw tuple earlier: */
- if (!ItemIdIsUsed(Pitemid))
+ if (!ItemIdIsNormal(Pitemid))
elog(ERROR, "parent itemid marked as unused");
PTdata = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
@@ -2268,6 +2372,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
dst_buffer, dst_page, destvacpage,
&ec, &Ctid, vtmove[ti].cleanVpd);
+ /*
+ * If the tuple we are moving is a heap-only tuple,
+ * this move will generate an additional index entry,
+ * so increment the rel_indexed_tuples count.
+ */
+ if (HeapTupleHeaderIsHeapOnly(tuple.t_data))
+ vacrelstats->rel_indexed_tuples++;
+
num_moved++;
if (destvacpage->blkno > last_move_dest_block)
last_move_dest_block = destvacpage->blkno;
@@ -2280,7 +2392,31 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
vacpage->offsets[vacpage->offsets_free++] =
ItemPointerGetOffsetNumber(&(tuple.t_self));
else
+ {
+ /*
+ * When we move tuple chains, we may need to move
+ * tuples from a block that we haven't yet scanned in
+ * the outer walk-along-the-relation loop. Note that we
+ * can't be moving a tuple from a block that we have
+ * already scanned because if such a tuple exists, then
+ * we must have moved the chain along with that tuple
+ * when we scanned that block. IOW the test of
+ * (Cbuf != buf) guarantees that the tuple we are
+ * looking at right now is in a block which is yet to
+ * be scanned.
+ *
+ * We maintain two counters to correctly count the
+ * moved-off tuples from blocks that are not yet
+ * scanned (keep_tuples) and how many of them have
+ * index pointers (keep_indexed_tuples). The main
+ * reason to track the latter is to help verify
+ * that indexes have the expected number of entries
+ * when all the dust settles.
+ */
+ if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
+ keep_indexed_tuples++;
keep_tuples++;
+ }
ReleaseBuffer(dst_buffer);
ReleaseBuffer(Cbuf);
@@ -2328,6 +2464,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
move_plain_tuple(onerel, buf, page, &tuple,
dst_buffer, dst_page, dst_vacpage, &ec);
+ /*
+ * If the tuple we are moving is a heap-only tuple,
+ * this move will generate an additional index entry,
+ * so increment the rel_indexed_tuples count.
+ */
+ if (HeapTupleHeaderIsHeapOnly(tuple.t_data))
+ vacrelstats->rel_indexed_tuples++;
+
num_moved++;
if (dst_vacpage->blkno > last_move_dest_block)
last_move_dest_block = dst_vacpage->blkno;
@@ -2361,6 +2505,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (!ItemIdIsUsed(itemid))
continue;
+ /* Shouldn't be any DEAD or REDIRECT items anymore */
+ Assert(ItemIdIsNormal(itemid));
+
htup = (HeapTupleHeader) PageGetItem(page, itemid);
if (htup->t_infomask & HEAP_XMIN_COMMITTED)
continue;
@@ -2389,6 +2536,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
{
vacpage->offsets[vacpage->offsets_free++] = off;
Assert(keep_tuples > 0);
+ /*
+ * If this is not a heap-only tuple, there must be an
+ * index entry for this item which will be removed in
+ * the index cleanup. Decrement the keep_indexed_tuples
+ * count to remember this.
+ */
+ if (!HeapTupleHeaderIsHeapOnly(htup))
+ keep_indexed_tuples--;
keep_tuples--;
}
}
@@ -2396,6 +2551,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
{
vacpage->offsets[vacpage->offsets_free++] = off;
Assert(keep_tuples > 0);
+ if (!HeapTupleHeaderIsHeapOnly(htup))
+ keep_indexed_tuples--;
keep_tuples--;
}
}
@@ -2529,11 +2686,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
* page during chain moves but not been scanned over subsequently.
* The tuple ids of these tuples are not recorded as free offsets
* for any VacPage, so they will not be cleared from the indexes.
+ * keep_indexed_tuples is the portion of these that are expected
+ * to have index entries.
*/
Assert(keep_tuples >= 0);
for (i = 0; i < nindexes; i++)
vacuum_index(&Nvacpagelist, Irel[i],
- vacrelstats->rel_tuples, keep_tuples);
+ vacrelstats->rel_indexed_tuples,
+ keep_indexed_tuples);
}
/*
@@ -2551,7 +2711,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
OffsetNumber unused[MaxOffsetNumber];
OffsetNumber offnum,
maxoff;
- int uncnt;
+ int uncnt = 0;
int num_tuples = 0;
buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
@@ -2567,6 +2727,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (!ItemIdIsUsed(itemid))
continue;
+ /* Shouldn't be any DEAD or REDIRECT items anymore */
+ Assert(ItemIdIsNormal(itemid));
+
htup = (HeapTupleHeader) PageGetItem(page, itemid);
if (htup->t_infomask & HEAP_XMIN_COMMITTED)
continue;
@@ -2584,12 +2747,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
ItemIdSetUnused(itemid);
num_tuples++;
+
+ unused[uncnt++] = offnum;
}
Assert(vacpage->offsets_free == num_tuples);
START_CRIT_SECTION();
- uncnt = PageRepairFragmentation(page, unused);
+ PageRepairFragmentation(page);
MarkBufferDirty(buf);
@@ -2598,7 +2763,10 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
{
XLogRecPtr recptr;
- recptr = log_heap_clean(onerel, buf, unused, uncnt);
+ recptr = log_heap_clean(onerel, buf,
+ NULL, 0, NULL, 0,
+ unused, uncnt,
+ false);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
@@ -2706,15 +2874,17 @@ move_chain_tuple(Relation rel,
/*
* Update the state of the copied tuple, and store it on the destination
- * page.
+ * page. The copied tuple is never part of a HOT chain.
*/
newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
HEAP_XMIN_INVALID |
HEAP_MOVED_OFF);
newtup.t_data->t_infomask |= HEAP_MOVED_IN;
+ HeapTupleHeaderClearHotUpdated(newtup.t_data);
+ HeapTupleHeaderClearHeapOnly(newtup.t_data);
HeapTupleHeaderSetXvac(newtup.t_data, myXID);
newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
- InvalidOffsetNumber, false);
+ InvalidOffsetNumber, false, true);
if (newoff == InvalidOffsetNumber)
elog(PANIC, "failed to add item with len = %lu to page %u while moving tuple chain",
(unsigned long) tuple_len, dst_vacpage->blkno);
@@ -2809,17 +2979,19 @@ move_plain_tuple(Relation rel,
START_CRIT_SECTION();
/*
- * Mark new tuple as MOVED_IN by me.
+ * Mark new tuple as MOVED_IN by me; also mark it not HOT.
*/
newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
HEAP_XMIN_INVALID |
HEAP_MOVED_OFF);
newtup.t_data->t_infomask |= HEAP_MOVED_IN;
+ HeapTupleHeaderClearHotUpdated(newtup.t_data);
+ HeapTupleHeaderClearHeapOnly(newtup.t_data);
HeapTupleHeaderSetXvac(newtup.t_data, myXID);
/* add tuple to the page */
newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
- InvalidOffsetNumber, false);
+ InvalidOffsetNumber, false, true);
if (newoff == InvalidOffsetNumber)
elog(PANIC, "failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)",
(unsigned long) tuple_len,
@@ -2934,6 +3106,9 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
if (!ItemIdIsUsed(itemid))
continue;
+ /* Shouldn't be any DEAD or REDIRECT items anymore */
+ Assert(ItemIdIsNormal(itemid));
+
htup = (HeapTupleHeader) PageGetItem(page, itemid);
if (htup->t_infomask & HEAP_XMIN_COMMITTED)
continue;
@@ -3019,10 +3194,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
static void
vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
{
- OffsetNumber unused[MaxOffsetNumber];
- int uncnt;
Page page = BufferGetPage(buffer);
- ItemId itemid;
int i;
/* There shouldn't be any tuples moved onto the page yet! */
@@ -3032,11 +3204,12 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
for (i = 0; i < vacpage->offsets_free; i++)
{
- itemid = PageGetItemId(page, vacpage->offsets[i]);
+ ItemId itemid = PageGetItemId(page, vacpage->offsets[i]);
+
ItemIdSetUnused(itemid);
}
- uncnt = PageRepairFragmentation(page, unused);
+ PageRepairFragmentation(page);
MarkBufferDirty(buffer);
@@ -3045,7 +3218,10 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
{
XLogRecPtr recptr;
- recptr = log_heap_clean(onerel, buffer, unused, uncnt);
+ recptr = log_heap_clean(onerel, buffer,
+ NULL, 0, NULL, 0,
+ vacpage->offsets, vacpage->offsets_free,
+ false);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
@@ -3527,8 +3703,7 @@ enough_space(VacPage vacpage, Size len)
static Size
PageGetFreeSpaceWithFillFactor(Relation relation, Page page)
{
- PageHeader pd = (PageHeader) page;
- Size freespace = pd->pd_upper - pd->pd_lower;
+ Size freespace = PageGetHeapFreeSpace(page);
Size targetfree;
targetfree = RelationGetTargetPageFreeSpace(relation,