summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/nbtree/nbtinsert.c49
1 files changed, 29 insertions, 20 deletions
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index be60781fc98..85d97a970ac 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -1473,6 +1473,8 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
Page origpage;
Page leftpage,
rightpage;
+ PGAlignedBlock leftpage_buf,
+ rightpage_buf;
BlockNumber origpagenumber,
rightpagenumber;
BTPageOpaque ropaque,
@@ -1543,8 +1545,8 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
firstrightoff = _bt_findsplitloc(rel, origpage, newitemoff, newitemsz,
newitem, &newitemonleft);
- /* Allocate temp buffer for leftpage */
- leftpage = PageGetTempPage(origpage);
+ /* Use temporary buffer for leftpage */
+ leftpage = leftpage_buf.data;
_bt_pageinit(leftpage, BufferGetPageSize(buf));
lopaque = BTPageGetOpaque(leftpage);
@@ -1707,19 +1709,23 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
/*
* Acquire a new right page to split into, now that left page has a new
- * high key. From here on, it's not okay to throw an error without
- * zeroing rightpage first. This coding rule ensures that we won't
- * confuse future VACUUM operations, which might otherwise try to re-find
- * a downlink to a leftover junk page as the page undergoes deletion.
+ * high key.
*
- * It would be reasonable to start the critical section just after the new
- * rightpage buffer is acquired instead; that would allow us to avoid
- * leftover junk pages without bothering to zero rightpage. We do it this
- * way because it avoids an unnecessary PANIC when either origpage or its
- * existing sibling page are corrupt.
+ * To not confuse future VACUUM operations, we zero the right page and
+ * work on an in-memory copy of it before writing WAL, then copy its
+ * contents back to the actual page once we start the critical section
+ * work. This simplifies the split work, so as there is no need to zero
+ * the right page before throwing an error.
*/
rbuf = _bt_allocbuf(rel, heaprel);
- rightpage = BufferGetPage(rbuf);
+ rightpage = rightpage_buf.data;
+
+ /*
+ * Copy the contents of the right page into its temporary location, and
+ * zero the original space.
+ */
+ memcpy(rightpage, BufferGetPage(rbuf), BLCKSZ);
+ memset(BufferGetPage(rbuf), 0, BLCKSZ);
rightpagenumber = BufferGetBlockNumber(rbuf);
/* rightpage was initialized by _bt_allocbuf */
ropaque = BTPageGetOpaque(rightpage);
@@ -1768,7 +1774,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (PageAddItem(rightpage, (Item) righthighkey, itemsz, afterrightoff,
false, false) == InvalidOffsetNumber)
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add high key to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1816,7 +1821,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(leftpage, newitemsz, newitem, afterleftoff,
false))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the left sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1829,7 +1833,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, newitemsz, newitem, afterrightoff,
afterrightoff == minusinfoff))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1843,7 +1846,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
{
if (!_bt_pgaddtup(leftpage, itemsz, dataitem, afterleftoff, false))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add old item to the left sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1855,7 +1857,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, itemsz, dataitem, afterrightoff,
afterrightoff == minusinfoff))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add old item to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1876,7 +1877,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, newitemsz, newitem, afterrightoff,
afterrightoff == minusinfoff))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1896,7 +1896,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
sopaque = BTPageGetOpaque(spage);
if (sopaque->btpo_prev != origpagenumber)
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg_internal("right sibling's left-link doesn't match: "
@@ -1939,9 +1938,19 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
* original. We need to do this before writing the WAL record, so that
* XLogInsert can WAL log an image of the page if necessary.
*/
- PageRestoreTempPage(leftpage, origpage);
+ memcpy(origpage, leftpage, BLCKSZ);
/* leftpage, lopaque must not be used below here */
+ /*
+ * Move the contents of the right page from its temporary location to the
+ * destination buffer, before writing the WAL record. Unlike the left
+ * page, the right page and its opaque area are still needed to complete
+ * the update of the page, so reinitialize them.
+ */
+ rightpage = BufferGetPage(rbuf);
+ memcpy(rightpage, rightpage_buf.data, BLCKSZ);
+ ropaque = BTPageGetOpaque(rightpage);
+
MarkBufferDirty(buf);
MarkBufferDirty(rbuf);