diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-04-20 14:25:15 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-04-20 14:25:15 -0400 |
commit | bde361fef5ea3c65074a0c95c724fae5ac8a1bb5 (patch) | |
tree | c4afbd5713553e1596aeee77c916cdd8fb7f6bf2 /src/backend/access/gin/ginentrypage.c | |
parent | a343e223a5c33a7283a6d8b255c9dbc48dbc5061 (diff) |
Fix memory leak and other bugs in ginPlaceToPage() & subroutines.
Commit 36a35c550ac114ca turned the interface between ginPlaceToPage and
its subroutines in gindatapage.c and ginentrypage.c into a royal mess:
page-update critical sections were started in one place and finished in
another place not even in the same file, and the very same subroutine
might return having started a critical section or not. Subsequent patches
band-aided over some of the problems with this design by making things
even messier.
One user-visible resulting problem is memory leaks caused by the need for
the subroutines to allocate storage that would survive until ginPlaceToPage
calls XLogInsert (as reported by Julien Rouhaud). This would not typically
be noticeable during retail index updates. It could be visible in a GIN
index build, in the form of memory consumption swelling to several times
the commanded maintenance_work_mem.
Another rather nasty problem is that in the internal-page-splitting code
path, we would clear the child page's GIN_INCOMPLETE_SPLIT flag well before
entering the critical section that it's supposed to be cleared in; a
failure in between would leave the index in a corrupt state. There were
also assorted coding-rule violations with little immediate consequence but
possible long-term hazards, such as beginning an XLogInsert sequence before
entering a critical section, or calling elog(DEBUG) inside a critical
section.
To fix, redefine the API between ginPlaceToPage() and its subroutines
by splitting the subroutines into two parts. The "beginPlaceToPage"
subroutine does what can be done outside a critical section, including
full computation of the result pages into temporary storage when we're
going to split the target page. The "execPlaceToPage" subroutine is called
within a critical section established by ginPlaceToPage(), and it handles
the actual page update in the non-split code path. The critical section,
as well as the XLOG insertion call sequence, are both now always started
and finished in ginPlaceToPage(). Also, make ginPlaceToPage() create and
work in a short-lived memory context to eliminate the leakage problem.
(Since a short-lived memory context had been getting created in the most
common code path in the subroutines, this shouldn't cause any noticeable
performance penalty; we're just moving the overhead up one call level.)
In passing, fix a bunch of comments that had gone unmaintained throughout
all this klugery.
Report: <571276DD.5050303@dalibo.com>
Diffstat (limited to 'src/backend/access/gin/ginentrypage.c')
-rw-r--r-- | src/backend/access/gin/ginentrypage.c | 88 |
1 files changed, 57 insertions, 31 deletions
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 25127456480..8c0bfe9fdeb 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -21,7 +21,7 @@ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, - void *insertPayload, + GinBtreeEntryInsertData *insertData, BlockNumber updateblkno, Page *newlpage, Page *newrpage); @@ -508,39 +508,57 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off, } /* - * Place tuple on page and fills WAL record + * Prepare to insert data on an entry page. * - * If the tuple doesn't fit, returns false without modifying the page. + * If it will fit, return GPTP_INSERT after doing whatever setup is needed + * before we enter the insertion critical section. *ptp_workspace can be + * set to pass information along to the execPlaceToPage function. * - * On insertion to an internal node, in addition to inserting the given item, - * the downlink of the existing item at 'off' is updated to point to - * 'updateblkno'. + * If it won't fit, perform a page split and return two temporary page + * images into *newlpage and *newrpage, with result GPTP_SPLIT. * - * On INSERTED, registers the buffer as buffer ID 0, with data. - * On SPLIT, returns rdata that represents the split pages in *prdata. + * In neither case should the given page buffer be modified here. + * + * Note: on insertion to an internal node, in addition to inserting the given + * item, the downlink of the existing item at stack->off will be updated to + * point to updateblkno. */ static GinPlaceToPageRC -entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, - void *insertPayload, BlockNumber updateblkno, - Page *newlpage, Page *newrpage) +entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertPayload, BlockNumber updateblkno, + void **ptp_workspace, + Page *newlpage, Page *newrpage) { GinBtreeEntryInsertData *insertData = insertPayload; - Page page = BufferGetPage(buf); OffsetNumber off = stack->off; - OffsetNumber placed; - /* this must be static so it can be returned to caller. */ - static ginxlogInsertEntry data; - - /* quick exit if it doesn't fit */ + /* If it doesn't fit, deal with split case */ if (!entryIsEnoughSpace(btree, buf, off, insertData)) { - entrySplitPage(btree, buf, stack, insertPayload, updateblkno, + entrySplitPage(btree, buf, stack, insertData, updateblkno, newlpage, newrpage); - return SPLIT; + return GPTP_SPLIT; } - START_CRIT_SECTION(); + /* Else, we're ready to proceed with insertion */ + return GPTP_INSERT; +} + +/* + * Perform data insertion after beginPlaceToPage has decided it will fit. + * + * This is invoked within a critical section, and XLOG record creation (if + * needed) is already started. The target buffer is registered in slot 0. + */ +static void +entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, + void *insertPayload, BlockNumber updateblkno, + void *ptp_workspace) +{ + GinBtreeEntryInsertData *insertData = insertPayload; + Page page = BufferGetPage(buf); + OffsetNumber off = stack->off; + OffsetNumber placed; entryPreparePage(btree, page, off, insertData, updateblkno); @@ -554,34 +572,36 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, if (RelationNeedsWAL(btree->index)) { + /* + * This must be static, because it has to survive until XLogInsert, + * and we can't palloc here. Ugly, but the XLogInsert infrastructure + * isn't reentrant anyway. + */ + static ginxlogInsertEntry data; + data.isDelete = insertData->isDelete; data.offset = off; - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) &data, offsetof(ginxlogInsertEntry, tuple)); XLogRegisterBufData(0, (char *) insertData->entry, IndexTupleSize(insertData->entry)); } - - return INSERTED; } /* - * Place tuple and split page, original buffer(lbuf) leaves untouched, - * returns shadow pages filled with new data. - * Tuples are distributed between pages by equal size on its, not - * an equal number! + * Split entry page and insert new data. + * + * Returns new temp pages to *newlpage and *newrpage. + * The original buffer is left untouched. */ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, - void *insertPayload, + GinBtreeEntryInsertData *insertData, BlockNumber updateblkno, Page *newlpage, Page *newrpage) { - GinBtreeEntryInsertData *insertData = insertPayload; OffsetNumber off = stack->off; OffsetNumber i, maxoff, @@ -646,6 +666,10 @@ entrySplitPage(GinBtree btree, Buffer origbuf, { itup = (IndexTuple) ptr; + /* + * Decide where to split. We try to equalize the pages' total data + * size, not number of tuples. + */ if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) @@ -663,6 +687,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, ptr += MAXALIGN(IndexTupleSize(itup)); } + /* return temp pages to caller */ *newlpage = lpage; *newrpage = rpage; } @@ -731,7 +756,8 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, btree->isMoveRight = entryIsMoveRight; btree->findItem = entryLocateLeafEntry; btree->findChildPtr = entryFindChildPtr; - btree->placeToPage = entryPlaceToPage; + btree->beginPlaceToPage = entryBeginPlaceToPage; + btree->execPlaceToPage = entryExecPlaceToPage; btree->fillRoot = ginEntryFillRoot; btree->prepareDownlink = entryPrepareDownlink; |