summaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtinsert.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtinsert.c')
-rw-r--r--src/backend/access/nbtree/nbtinsert.c142
1 files changed, 95 insertions, 47 deletions
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index b6ddf0ec4ca..dde43b1415a 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -58,7 +58,10 @@ static void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
static inline bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup,
OffsetNumber itup_off, bool newfirstdataitem);
-static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
+static void _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel,
+ BTInsertState insertstate,
+ bool lpdeadonly, bool checkingunique,
+ bool uniquedup);
/*
* _bt_doinsert() -- Handle insertion of a single index tuple in the tree.
@@ -871,38 +874,14 @@ _bt_findinsertloc(Relation rel,
}
/*
- * If the target page is full, see if we can obtain enough space by
- * erasing LP_DEAD items. If that fails to free enough space, see if
- * we can avoid a page split by performing a deduplication pass over
- * the page.
- *
- * We only perform a deduplication pass for a checkingunique caller
- * when the incoming item is a duplicate of an existing item on the
- * leaf page. This heuristic avoids wasting cycles -- we only expect
- * to benefit from deduplicating a unique index page when most or all
- * recently added items are duplicates. See nbtree/README.
+ * If the target page is full, see if we can obtain enough space using
+ * one or more strategies (e.g. erasing LP_DEAD items, deduplication).
+ * Page splits are expensive, and should only go ahead when truly
+ * necessary.
*/
if (PageGetFreeSpace(page) < insertstate->itemsz)
- {
- if (P_HAS_GARBAGE(opaque))
- {
- _bt_vacuum_one_page(rel, insertstate->buf, heapRel);
- insertstate->bounds_valid = false;
-
- /* Might as well assume duplicates (if checkingunique) */
- uniquedup = true;
- }
-
- if (itup_key->allequalimage && BTGetDeduplicateItems(rel) &&
- (!checkingunique || uniquedup) &&
- PageGetFreeSpace(page) < insertstate->itemsz)
- {
- _bt_dedup_one_page(rel, insertstate->buf, heapRel,
- insertstate->itup, insertstate->itemsz,
- checkingunique);
- insertstate->bounds_valid = false;
- }
- }
+ _bt_delete_or_dedup_one_page(rel, heapRel, insertstate, false,
+ checkingunique, uniquedup);
}
else
{
@@ -942,8 +921,9 @@ _bt_findinsertloc(Relation rel,
*/
if (P_HAS_GARBAGE(opaque))
{
- _bt_vacuum_one_page(rel, insertstate->buf, heapRel);
- insertstate->bounds_valid = false;
+ /* Erase LP_DEAD items (won't deduplicate) */
+ _bt_delete_or_dedup_one_page(rel, heapRel, insertstate, true,
+ checkingunique, false);
if (PageGetFreeSpace(page) >= insertstate->itemsz)
break; /* OK, now we have enough space */
@@ -993,14 +973,17 @@ _bt_findinsertloc(Relation rel,
* performing a posting list split, so delete all LP_DEAD items early.
* This is the only case where LP_DEAD deletes happen even though
* there is space for newitem on the page.
+ *
+ * This can only erase LP_DEAD items (it won't deduplicate).
*/
- _bt_vacuum_one_page(rel, insertstate->buf, heapRel);
+ _bt_delete_or_dedup_one_page(rel, heapRel, insertstate, true,
+ checkingunique, false);
/*
* Do new binary search. New insert location cannot overlap with any
* posting list now.
*/
- insertstate->bounds_valid = false;
+ Assert(!insertstate->bounds_valid);
insertstate->postingoff = 0;
newitemoff = _bt_binsrch_insert(rel, insertstate);
Assert(insertstate->postingoff == 0);
@@ -2623,32 +2606,59 @@ _bt_pgaddtup(Page page,
}
/*
- * _bt_vacuum_one_page - vacuum just one index page.
+ * _bt_delete_or_dedup_one_page - Try to avoid a leaf page split by attempting
+ * a variety of operations.
+ *
+ * There are two operations performed here: deleting items already marked
+ * LP_DEAD, and deduplication. If both operations fail to free enough space
+ * for the incoming item then caller will go on to split the page. We always
+ * attempt our preferred strategy (which is to delete items whose LP_DEAD bit
+ * are set) first. If that doesn't work out we move on to deduplication.
+ *
+ * Caller's checkingunique and uniquedup arguments help us decide if we should
+ * perform deduplication, which is primarily useful with low cardinality data,
+ * but can sometimes absorb version churn.
+ *
+ * Callers that only want us to look for/delete LP_DEAD items can ask for that
+ * directly by passing true 'lpdeadonly' argument.
*
- * Try to remove LP_DEAD items from the given page. The passed buffer
- * must be exclusive-locked, but unlike a real VACUUM, we don't need a
- * super-exclusive "cleanup" lock (see nbtree/README).
+ * Note: We used to only delete LP_DEAD items when the BTP_HAS_GARBAGE page
+ * level flag was found set. The flag was useful back when there wasn't
+ * necessarily one single page for a duplicate tuple to go on (before heap TID
+ * became a part of the key space in version 4 indexes). But we don't
+ * actually look at the flag anymore (it's not a gating condition for our
+ * caller). That would cause us to miss tuples that are safe to delete,
+ * without getting any benefit in return. We know that the alternative is to
+ * split the page; scanning the line pointer array in passing won't have
+ * noticeable overhead. (We still maintain the BTP_HAS_GARBAGE flag despite
+ * all this because !heapkeyspace indexes must still do a "getting tired"
+ * linear search, and so are likely to get some benefit from using it as a
+ * gating condition.)
*/
static void
-_bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
+_bt_delete_or_dedup_one_page(Relation rel, Relation heapRel,
+ BTInsertState insertstate,
+ bool lpdeadonly, bool checkingunique,
+ bool uniquedup)
{
OffsetNumber deletable[MaxIndexTuplesPerPage];
int ndeletable = 0;
OffsetNumber offnum,
- minoff,
maxoff;
+ Buffer buffer = insertstate->buf;
+ BTScanInsert itup_key = insertstate->itup_key;
Page page = BufferGetPage(buffer);
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
Assert(P_ISLEAF(opaque));
+ Assert(lpdeadonly || itup_key->heapkeyspace);
/*
* Scan over all items to see which ones need to be deleted according to
* LP_DEAD flags.
*/
- minoff = P_FIRSTDATAKEY(opaque);
maxoff = PageGetMaxOffsetNumber(page);
- for (offnum = minoff;
+ for (offnum = P_FIRSTDATAKEY(opaque);
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
{
@@ -2659,12 +2669,50 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
}
if (ndeletable > 0)
+ {
_bt_delitems_delete(rel, buffer, deletable, ndeletable, heapRel);
+ insertstate->bounds_valid = false;
+
+ /* Return when a page split has already been avoided */
+ if (PageGetFreeSpace(page) >= insertstate->itemsz)
+ return;
+
+ /* Might as well assume duplicates (if checkingunique) */
+ uniquedup = true;
+ }
+
+ /*
+ * Some callers only want to delete LP_DEAD items. Return early for these
+ * callers.
+ *
+ * Note: The page's BTP_HAS_GARBAGE hint flag may still be set when we
+ * return at this point (or when we go on the try either or both of our
+ * other strategies and they also fail). We do not bother expending a
+ * separate write to clear it, however. Caller will definitely clear it
+ * when it goes on to split the page (plus deduplication knows to clear
+ * the flag when it actually modifies the page).
+ */
+ if (lpdeadonly)
+ return;
+
+ /*
+ * We can get called in the checkingunique case when there is no reason to
+ * believe that there are any duplicates on the page; we should at least
+ * still check for LP_DEAD items. If that didn't work out, give up and
+ * let caller split the page. Deduplication cannot be justified given
+ * there is no reason to think that there are duplicates.
+ */
+ if (checkingunique && !uniquedup)
+ return;
+
+ /* Assume bounds about to be invalidated (this is almost certain now) */
+ insertstate->bounds_valid = false;
/*
- * Note: if we didn't find any LP_DEAD items, then the page's
- * BTP_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
- * separate write to clear it, however. We will clear it when we split
- * the page, or when deduplication runs.
+ * Perform deduplication pass, though only when it is enabled for the
+ * index and known to be safe (it must be an allequalimage index).
*/
+ if (BTGetDeduplicateItems(rel) && itup_key->allequalimage)
+ _bt_dedup_pass(rel, buffer, heapRel, insertstate->itup,
+ insertstate->itemsz, checkingunique);
}