summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginentrypage.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-04-20 14:25:15 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-04-20 14:25:15 -0400
commitef35afa35c422928d8fb900dd69cfc182f076bf0 (patch)
tree0a22e67ee8f01c113a22b530f89a1168b10595ba /src/backend/access/gin/ginentrypage.c
parent21b7f49eb88a6d39acf9569cddf65f1985318056 (diff)
Fix memory leak and other bugs in ginPlaceToPage() & subroutines.
Commit 36a35c550ac114ca turned the interface between ginPlaceToPage and its subroutines in gindatapage.c and ginentrypage.c into a royal mess: page-update critical sections were started in one place and finished in another place not even in the same file, and the very same subroutine might return having started a critical section or not. Subsequent patches band-aided over some of the problems with this design by making things even messier. One user-visible resulting problem is memory leaks caused by the need for the subroutines to allocate storage that would survive until ginPlaceToPage calls XLogInsert (as reported by Julien Rouhaud). This would not typically be noticeable during retail index updates. It could be visible in a GIN index build, in the form of memory consumption swelling to several times the commanded maintenance_work_mem. Another rather nasty problem is that in the internal-page-splitting code path, we would clear the child page's GIN_INCOMPLETE_SPLIT flag well before entering the critical section that it's supposed to be cleared in; a failure in between would leave the index in a corrupt state. There were also assorted coding-rule violations with little immediate consequence but possible long-term hazards, such as beginning an XLogInsert sequence before entering a critical section, or calling elog(DEBUG) inside a critical section. To fix, redefine the API between ginPlaceToPage() and its subroutines by splitting the subroutines into two parts. The "beginPlaceToPage" subroutine does what can be done outside a critical section, including full computation of the result pages into temporary storage when we're going to split the target page. The "execPlaceToPage" subroutine is called within a critical section established by ginPlaceToPage(), and it handles the actual page update in the non-split code path. The critical section, as well as the XLOG insertion call sequence, are both now always started and finished in ginPlaceToPage(). Also, make ginPlaceToPage() create and work in a short-lived memory context to eliminate the leakage problem. (Since a short-lived memory context had been getting created in the most common code path in the subroutines, this shouldn't cause any noticeable performance penalty; we're just moving the overhead up one call level.) In passing, fix a bunch of comments that had gone unmaintained throughout all this klugery. Report: <571276DD.5050303@dalibo.com>
Diffstat (limited to 'src/backend/access/gin/ginentrypage.c')
-rw-r--r--src/backend/access/gin/ginentrypage.c140
1 files changed, 88 insertions, 52 deletions
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 412f90da4db..bdf1f2e5889 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -20,9 +20,10 @@
static void entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
- void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
- Page *newlpage, Page *newrpage);
+ GinBtreeEntryInsertData *insertData,
+ BlockNumber updateblkno,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata);
/*
* Form a tuple for entry tree.
@@ -507,40 +508,63 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
}
/*
- * Place tuple on page and fills WAL record
+ * Prepare to insert data on an entry page.
*
- * If the tuple doesn't fit, returns false without modifying the page.
+ * If it will fit, return GPTP_INSERT after doing whatever setup is needed
+ * before we enter the insertion critical section. *ptp_workspace can be
+ * set to pass information along to the execPlaceToPage function.
*
- * On insertion to an internal node, in addition to inserting the given item,
- * the downlink of the existing item at 'off' is updated to point to
- * 'updateblkno'.
+ * If it won't fit, perform a page split and return two temporary page
+ * images into *newlpage and *newrpage, with result GPTP_SPLIT. Also,
+ * if WAL logging is needed, fill one or more entries of rdata[] with
+ * whatever data must be appended to the WAL record.
+ *
+ * In neither case should the given page buffer be modified here.
+ *
+ * Note: on insertion to an internal node, in addition to inserting the given
+ * item, the downlink of the existing item at stack->off will be updated to
+ * point to updateblkno.
*/
static GinPlaceToPageRC
-entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertPayload, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertPayload, BlockNumber updateblkno,
+ void **ptp_workspace,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
GinBtreeEntryInsertData *insertData = insertPayload;
- Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
- OffsetNumber placed;
- int cnt = 0;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata[3];
- static ginxlogInsertEntry data;
-
- /* quick exit if it doesn't fit */
+ /* If it doesn't fit, deal with split case */
if (!entryIsEnoughSpace(btree, buf, off, insertData))
{
- entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
- prdata, newlpage, newrpage);
- return SPLIT;
+ entrySplitPage(btree, buf, stack, insertData, updateblkno,
+ newlpage, newrpage, rdata);
+ return GPTP_SPLIT;
}
- START_CRIT_SECTION();
+ /* Else, we're ready to proceed with insertion */
+ return GPTP_INSERT;
+}
+
+/*
+ * Perform data insertion after beginPlaceToPage has decided it will fit.
+ *
+ * This is invoked within a critical section. It must modify the target
+ * buffer and store one or more XLogRecData records describing the changes
+ * in rdata[].
+ */
+static void
+entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
+ void *insertPayload, BlockNumber updateblkno,
+ void *ptp_workspace,
+ XLogRecData *rdata)
+{
+ GinBtreeEntryInsertData *insertData = insertPayload;
+ Page page = BufferGetPage(buf);
+ OffsetNumber off = stack->off;
+ OffsetNumber placed;
- *prdata = rdata;
entryPreparePage(btree, page, off, insertData, updateblkno);
placed = PageAddItem(page,
@@ -551,39 +575,47 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- data.isDelete = insertData->isDelete;
- data.offset = off;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) &data;
- rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
- rdata[cnt].next = &rdata[cnt + 1];
- cnt++;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) insertData->entry;
- rdata[cnt].len = IndexTupleSize(insertData->entry);
- rdata[cnt].next = NULL;
-
- return INSERTED;
+ if (RelationNeedsWAL(btree->index))
+ {
+ /*
+ * This must be static, because it has to survive until XLogInsert,
+ * and we can't palloc here. Ugly, but the XLogInsert infrastructure
+ * isn't reentrant anyway.
+ */
+ static ginxlogInsertEntry data;
+
+ data.isDelete = insertData->isDelete;
+ data.offset = off;
+
+ rdata[0].buffer = buf;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = offsetof(ginxlogInsertEntry, tuple);
+ rdata[0].next = &rdata[1];
+
+ rdata[1].buffer = buf;
+ rdata[1].buffer_std = true;
+ rdata[1].data = (char *) insertData->entry;
+ rdata[1].len = IndexTupleSize(insertData->entry);
+ rdata[1].next = NULL;
+ }
}
/*
- * Place tuple and split page, original buffer(lbuf) leaves untouched,
- * returns shadow pages filled with new data.
- * Tuples are distributed between pages by equal size on its, not
- * an equal number!
+ * Split entry page and insert new data.
+ *
+ * Returns new temp pages to *newlpage and *newrpage.
+ * The original buffer is left untouched.
+ * Also, set up rdata[] entries describing data to be appended to WAL record.
*/
static void
entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
- void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+ GinBtreeEntryInsertData *insertData,
+ BlockNumber updateblkno,
+ Page *newlpage, Page *newrpage,
+ XLogRecData *rdata)
{
- GinBtreeEntryInsertData *insertData = insertPayload;
OffsetNumber off = stack->off;
OffsetNumber i,
maxoff,
@@ -600,11 +632,9 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
Size pageSize = PageGetPageSize(lpage);
/* these must be static so they can be returned to caller */
- static XLogRecData rdata[2];
static ginxlogSplitEntry data;
static char tupstore[2 * BLCKSZ];
- *prdata = rdata;
entryPreparePage(btree, lpage, off, insertData, updateblkno);
/*
@@ -655,6 +685,10 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
{
itup = (IndexTuple) ptr;
+ /*
+ * Decide where to split. We try to equalize the pages' total data
+ * size, not number of tuples.
+ */
if (lsize > totalsize / 2)
{
if (separator == InvalidOffsetNumber)
@@ -685,6 +719,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
rdata[1].len = tupstoresize;
rdata[1].next = NULL;
+ /* return temp pages to caller */
*newlpage = lpage;
*newrpage = rpage;
}
@@ -753,7 +788,8 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
btree->isMoveRight = entryIsMoveRight;
btree->findItem = entryLocateLeafEntry;
btree->findChildPtr = entryFindChildPtr;
- btree->placeToPage = entryPlaceToPage;
+ btree->beginPlaceToPage = entryBeginPlaceToPage;
+ btree->execPlaceToPage = entryExecPlaceToPage;
btree->fillRoot = ginEntryFillRoot;
btree->prepareDownlink = entryPrepareDownlink;