summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/gindatapage.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-04-20 14:25:15 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-04-20 14:25:15 -0400
commitef35afa35c422928d8fb900dd69cfc182f076bf0 (patch)
tree0a22e67ee8f01c113a22b530f89a1168b10595ba /src/backend/access/gin/gindatapage.c
parent21b7f49eb88a6d39acf9569cddf65f1985318056 (diff)
Fix memory leak and other bugs in ginPlaceToPage() & subroutines.
Commit 36a35c550ac114ca turned the interface between ginPlaceToPage and its subroutines in gindatapage.c and ginentrypage.c into a royal mess: page-update critical sections were started in one place and finished in another place not even in the same file, and the very same subroutine might return having started a critical section or not. Subsequent patches band-aided over some of the problems with this design by making things even messier. One user-visible resulting problem is memory leaks caused by the need for the subroutines to allocate storage that would survive until ginPlaceToPage calls XLogInsert (as reported by Julien Rouhaud). This would not typically be noticeable during retail index updates. It could be visible in a GIN index build, in the form of memory consumption swelling to several times the commanded maintenance_work_mem. Another rather nasty problem is that in the internal-page-splitting code path, we would clear the child page's GIN_INCOMPLETE_SPLIT flag well before entering the critical section that it's supposed to be cleared in; a failure in between would leave the index in a corrupt state. There were also assorted coding-rule violations with little immediate consequence but possible long-term hazards, such as beginning an XLogInsert sequence before entering a critical section, or calling elog(DEBUG) inside a critical section. To fix, redefine the API between ginPlaceToPage() and its subroutines by splitting the subroutines into two parts. The "beginPlaceToPage" subroutine does what can be done outside a critical section, including full computation of the result pages into temporary storage when we're going to split the target page. The "execPlaceToPage" subroutine is called within a critical section established by ginPlaceToPage(), and it handles the actual page update in the non-split code path. The critical section, as well as the XLOG insertion call sequence, are both now always started and finished in ginPlaceToPage(). Also, make ginPlaceToPage() create and work in a short-lived memory context to eliminate the leakage problem. (Since a short-lived memory context had been getting created in the most common code path in the subroutines, this shouldn't cause any noticeable performance penalty; we're just moving the overhead up one call level.) In passing, fix a bunch of comments that had gone unmaintained throughout all this klugery. Report: <571276DD.5050303@dalibo.com>
Diffstat (limited to 'src/backend/access/gin/gindatapage.c')
-rw-r--r--src/backend/access/gin/gindatapage.c373
1 files changed, 236 insertions, 137 deletions
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index e3ab6cfd0ee..209020992dc 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -18,7 +18,6 @@
#include "access/heapam_xlog.h"
#include "lib/ilist.h"
#include "miscadmin.h"
-#include "utils/memutils.h"
#include "utils/rel.h"
/*
@@ -57,6 +56,13 @@ typedef struct
int rsize; /* total size on right page */
bool oldformat; /* page is in pre-9.4 format on disk */
+
+ /*
+ * If we need WAL data representing the reconstructed leaf page, it's
+ * stored here by computeLeafRecompressWALData.
+ */
+ char *walinfo; /* buffer start */
+ int walinfolen; /* and length */
} disassembledLeaf;
typedef struct
@@ -98,20 +104,18 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage);
+ Page *newlpage, Page *newrpage, XLogRecData *rdata);
static disassembledLeaf *disassembleLeaf(Page page);
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
int nNewItems);
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
- disassembledLeaf *leaf);
+static void computeLeafRecompressWALData(disassembledLeaf *leaf);
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
-static void dataPlaceToPageLeafSplit(Buffer buf,
- disassembledLeaf *leaf,
+static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage);
+ Page lpage, Page rpage, XLogRecData *rdata);
/*
* Read TIDs from leaf data page to single uncompressed array. The TIDs are
@@ -424,12 +428,25 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
}
/*
- * Places keys to leaf data page and fills WAL record.
+ * Prepare to insert data on a leaf data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
+ *
+ * In neither case should the given page buffer be modified here.
*/
static GinPlaceToPageRC
-dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
GinBtreeDataLeafInsertData *items = insertdata;
ItemPointer newItems = &items->items[items->curitem];
@@ -442,15 +459,11 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
bool append;
int segsize;
Size freespace;
- MemoryContext tmpCxt;
- MemoryContext oldCxt;
disassembledLeaf *leaf;
leafSegmentInfo *lastleftinfo;
ItemPointerData maxOldItem;
ItemPointerData remaining;
- Assert(GinPageIsData(page));
-
rbound = *GinDataPageGetRightBound(page);
/*
@@ -474,18 +487,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
maxitems = i;
}
- /*
- * The following operations do quite a lot of small memory allocations,
- * create a temporary memory context so that we don't need to keep track
- * of them individually.
- */
- tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
- "Gin split temporary context",
- ALLOCSET_DEFAULT_MINSIZE,
- ALLOCSET_DEFAULT_INITSIZE,
- ALLOCSET_DEFAULT_MAXSIZE);
- oldCxt = MemoryContextSwitchTo(tmpCxt);
-
+ /* Disassemble the data on the page */
leaf = disassembleLeaf(page);
/*
@@ -550,16 +552,13 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
}
- /* Add the new items to the segments */
+ /* Add the new items to the segment list */
if (!addItemsToLeaf(leaf, newItems, maxitems))
{
/* all items were duplicates, we have nothing to do */
items->curitem += maxitems;
- MemoryContextSwitchTo(oldCxt);
- MemoryContextDelete(tmpCxt);
-
- return UNMODIFIED;
+ return GPTP_NO_WORK;
}
/*
@@ -592,21 +591,17 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
if (!needsplit)
{
/*
- * Great, all the items fit on a single page. Construct a WAL record
- * describing the changes we made, and write the segments back to the
- * page.
- *
- * Once we start modifying the page, there's no turning back. The
- * caller is responsible for calling END_CRIT_SECTION() after writing
- * the WAL record.
+ * Great, all the items fit on a single page. If needed, prepare data
+ * for a WAL record describing the changes we'll make.
*/
- MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index))
- *prdata = constructLeafRecompressWALData(buf, leaf);
- else
- *prdata = NULL;
- START_CRIT_SECTION();
- dataPlaceToPageLeafRecompress(buf, leaf);
+ computeLeafRecompressWALData(leaf);
+
+ /*
+ * We're ready to enter the critical section, but
+ * dataExecPlaceToPageLeaf will need access to the "leaf" data.
+ */
+ *ptp_workspace = leaf;
if (append)
elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
@@ -620,7 +615,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
else
{
/*
- * Had to split.
+ * Have to split.
*
* leafRepackItems already divided the segments between the left and
* the right page. It filled the left page as full as possible, and
@@ -632,7 +627,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* until they're balanced.
*
* As a further heuristic, when appending items to the end of the
- * page, try make the left page 75% full, one the assumption that
+ * page, try to make the left page 75% full, on the assumption that
* subsequent insertions will probably also go to the end. This packs
* the index somewhat tighter when appending to a table, which is very
* common.
@@ -681,11 +676,14 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
&lastleftinfo->nitems);
lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
- *newlpage = MemoryContextAlloc(oldCxt, BLCKSZ);
- *newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
+ /*
+ * Now allocate a couple of temporary page images, and fill them.
+ */
+ *newlpage = palloc(BLCKSZ);
+ *newrpage = palloc(BLCKSZ);
- dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
- prdata, *newlpage, *newrpage);
+ dataPlaceToPageLeafSplit(leaf, lbound, rbound,
+ *newlpage, *newrpage, rdata);
Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
@@ -701,12 +699,37 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
items->nitem - items->curitem - maxitems);
}
- MemoryContextSwitchTo(oldCxt);
- MemoryContextDelete(tmpCxt);
-
items->curitem += maxitems;
- return needsplit ? SPLIT : INSERTED;
+ return needsplit ? GPTP_SPLIT : GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ */
+static void
+dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
+
+ /* Apply changes to page */
+ dataPlaceToPageLeafRecompress(buf, leaf);
+
+ /* If needed, register WAL data built by computeLeafRecompressWALData */
+ if (RelationNeedsWAL(btree->index))
+ {
+ rdata[0].buffer = buf;
+ rdata[0].buffer_std = true;
+ rdata[0].data = leaf->walinfo;
+ rdata[0].len = leaf->walinfolen;
+ rdata[0].next = NULL;
+ }
}
/*
@@ -791,7 +814,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
*/
if (removedsomething)
{
- XLogRecData *payloadrdata = NULL;
bool modified;
/*
@@ -818,8 +840,11 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
}
if (RelationNeedsWAL(indexrel))
- payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+ computeLeafRecompressWALData(leaf);
+
+ /* Apply changes to page */
START_CRIT_SECTION();
+
dataPlaceToPageLeafRecompress(buffer, leaf);
MarkBufferDirty(buffer);
@@ -827,18 +852,24 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
if (RelationNeedsWAL(indexrel))
{
XLogRecPtr recptr;
- XLogRecData rdata;
+ XLogRecData rdata[2];
ginxlogVacuumDataLeafPage xlrec;
xlrec.node = indexrel->rd_node;
xlrec.blkno = BufferGetBlockNumber(buffer);
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
- rdata.next = payloadrdata;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &xlrec;
+ rdata[0].len = offsetof(ginxlogVacuumDataLeafPage, data);
+ rdata[0].next = &rdata[1];
+
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ rdata[1].data = leaf->walinfo;
+ rdata[1].len = leaf->walinfolen;
+ rdata[1].next = NULL;
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, rdata);
PageSetLSN(page, recptr);
}
@@ -848,15 +879,15 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
/*
* Construct a ginxlogRecompressDataLeaf record representing the changes
- * in *leaf.
+ * in *leaf. (Because this requires a palloc, we have to do it before
+ * we enter the critical section that actually updates the page.)
*/
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+computeLeafRecompressWALData(disassembledLeaf *leaf)
{
int nmodified = 0;
char *walbufbegin;
char *walbufend;
- XLogRecData *rdata;
dlist_iter iter;
int segno;
ginxlogRecompressDataLeaf *recompress_xlog;
@@ -871,12 +902,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
nmodified++;
}
- walbufbegin = palloc(
- sizeof(ginxlogRecompressDataLeaf) +
- BLCKSZ + /* max size needed to hold the segment
- * data */
- nmodified * 2 + /* (segno + action) per action */
- sizeof(XLogRecData));
+ walbufbegin =
+ palloc(sizeof(ginxlogRecompressDataLeaf) +
+ BLCKSZ + /* max size needed to hold the segment data */
+ nmodified * 2 /* (segno + action) per action */
+ );
walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@@ -944,22 +974,15 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
segno++;
}
- rdata = (XLogRecData *) MAXALIGN(walbufend);
- rdata->buffer = buf;
- rdata->buffer_std = TRUE;
- rdata->data = walbufbegin;
- rdata->len = walbufend - walbufbegin;
- rdata->next = NULL;
-
- return rdata;
+ /* Pass back the constructed info via *leaf */
+ leaf->walinfo = walbufbegin;
+ leaf->walinfolen = walbufend - walbufbegin;
}
/*
* Assemble a disassembled posting tree leaf page back to a buffer.
*
- * *prdata is filled with WAL information about this operation. The caller
- * is responsible for inserting to the WAL, along with any other information
- * about the operation that triggered this recompression.
+ * This just updates the target buffer; WAL stuff is caller's responsibility.
*
* NOTE: The segment pointers must not point directly to the same buffer,
* except for segments that have not been modified and whose preceding
@@ -1018,13 +1041,14 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
* segments to two pages instead of one.
*
* This is different from the non-split cases in that this does not modify
- * the original page directly, but to temporary in-memory copies of the new
- * left and right pages.
+ * the original page directly, but writes to temporary in-memory copies of
+ * the new left and right pages. Also, we prepare rdata[] entries for the
+ * data that must be appended to the WAL record.
*/
static void
-dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
+dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage)
+ Page lpage, Page rpage, XLogRecData *rdata)
{
char *ptr;
int segsize;
@@ -1034,9 +1058,8 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
dlist_node *firstright;
leafSegmentInfo *seginfo;
- /* these must be static so they can be returned to caller */
+ /* this must be static so it can be returned to caller */
static ginxlogSplitDataLeaf split_xlog;
- static XLogRecData rdata[3];
/* Initialize temporary pages to hold the new left and right pages */
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
@@ -1113,43 +1136,63 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
rdata[2].len = rsize;
rdata[2].next = NULL;
-
- *prdata = rdata;
}
/*
- * Place a PostingItem to page, and fill a WAL record.
+ * Prepare to insert data on an internal data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
*
- * If the item doesn't fit, returns false without modifying the page.
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
*
- * In addition to inserting the given item, the downlink of the existing item
- * at 'off' is updated to point to 'updateblkno'.
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
*/
static GinPlaceToPageRC
-dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
Page page = BufferGetPage(buf);
- OffsetNumber off = stack->off;
- PostingItem *pitem;
-
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata;
- static ginxlogInsertDataInternal data;
- /* split if we have to */
+ /* If it doesn't fit, deal with split case */
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
{
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
- prdata, newlpage, newrpage);
- return SPLIT;
+ newlpage, newrpage, rdata);
+ return GPTP_SPLIT;
}
- *prdata = &rdata;
- Assert(GinPageIsData(page));
+ /* Else, we're ready to proceed with insertion */
+ return GPTP_INSERT;
+}
- START_CRIT_SECTION();
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ */
+static void
+dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ Page page = BufferGetPage(buf);
+ OffsetNumber off = stack->off;
+ PostingItem *pitem;
/* Update existing downlink to point to next page (on internal page) */
pitem = GinDataPageGetPostingItem(page, off);
@@ -1159,50 +1202,106 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- data.offset = off;
- data.newitem = *pitem;
+ if (RelationNeedsWAL(btree->index))
+ {
+ /*
+ * This must be static, because it has to survive until XLogInsert,
+ * and we can't palloc here. Ugly, but the XLogInsert infrastructure
+ * isn't reentrant anyway.
+ */
+ static ginxlogInsertDataInternal data;
- rdata.buffer = buf;
- rdata.buffer_std = TRUE;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogInsertDataInternal);
- rdata.next = NULL;
+ data.offset = off;
+ data.newitem = *pitem;
- return INSERTED;
+ rdata[0].buffer = buf;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogInsertDataInternal);
+ rdata[0].next = NULL;
+ }
}
/*
- * Places an item (or items) to a posting tree. Calls relevant function of
- * internal of leaf page because they are handled very differently.
+ * Prepare to insert data on a posting-tree data page.
+ *
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
+ *
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
+ *
+ * Calls relevant function for internal or leaf page because they are handled
+ * very differently.
*/
static GinPlaceToPageRC
-dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
Page page = BufferGetPage(buf);
Assert(GinPageIsData(page));
if (GinPageIsLeaf(page))
- return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
- prdata, newlpage, newrpage);
+ return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
+ ptp_workspace,
+ newlpage, newrpage, rdata);
+ else
+ return dataBeginPlaceToPageInternal(btree, buf, stack,
+ insertdata, updateblkno,
+ ptp_workspace,
+ newlpage, newrpage, rdata);
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ *
+ * Calls relevant function for internal or leaf page because they are handled
+ * very differently.
+ */
+static void
+dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertdata, BlockNumber updateblkno,
+ void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ Page page = BufferGetPage(buf);
+
+ if (GinPageIsLeaf(page))
+ dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
+ ptp_workspace, rdata);
else
- return dataPlaceToPageInternal(btree, buf, stack,
- insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
+ updateblkno, ptp_workspace, rdata);
}
/*
- * Split page and fill WAL record. Returns a new temp buffer filled with data
- * that should go to the left page. The original buffer is left untouched.
+ * Split internal page and insert new data.
+ *
+ * Returns new temp pages to *newlpage and *newrpage.
+ * The original buffer is left untouched.
*/
static void
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage, XLogRecData *rdata)
{
Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off;
@@ -1218,7 +1317,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
/* these must be static so they can be returned to caller */
static ginxlogSplitDataInternal data;
- static XLogRecData rdata[4];
static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
lpage = PageGetTempPage(oldpage);
@@ -1226,8 +1324,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
- *prdata = rdata;
-
/*
* First construct a new list of PostingItems, which includes all the old
* items, and the new item.
@@ -1277,6 +1373,7 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
/* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound;
+ /* Set up WAL data */
data.separator = separator;
data.nitem = nitems;
data.rightbound = oldbound;
@@ -1291,6 +1388,7 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
rdata[1].len = nitems * sizeof(PostingItem);
rdata[1].next = NULL;
+ /* return temp pages to caller */
*newlpage = lpage;
*newrpage = rpage;
}
@@ -1855,7 +1953,8 @@ ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
btree->isMoveRight = dataIsMoveRight;
btree->findItem = NULL;
btree->findChildPtr = dataFindChildPtr;
- btree->placeToPage = dataPlaceToPage;
+ btree->beginPlaceToPage = dataBeginPlaceToPage;
+ btree->execPlaceToPage = dataExecPlaceToPage;
btree->fillRoot = ginDataFillRoot;
btree->prepareDownlink = dataPrepareDownlink;