diff options
Diffstat (limited to 'src/backend/access/gin/ginbtree.c')
-rw-r--r-- | src/backend/access/gin/ginbtree.c | 203 |
1 files changed, 119 insertions, 84 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 27f88e0eb21..0e19795b232 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -16,6 +16,7 @@ #include "access/gin_private.h" #include "miscadmin.h" +#include "utils/memutils.h" #include "utils/rel.h" static void ginFindParents(GinBtree btree, GinBtreeStack *stack); @@ -309,15 +310,16 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack) * Insert a new item to a page. * * Returns true if the insertion was finished. On false, the page was split and - * the parent needs to be updated. (a root split returns true as it doesn't - * need any further action by the caller to complete) + * the parent needs to be updated. (A root split returns true as it doesn't + * need any further action by the caller to complete.) * - * When inserting a downlink to a internal page, 'childbuf' contains the + * When inserting a downlink to an internal page, 'childbuf' contains the * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared - * atomically with the insert. Also, the existing item at the given location - * is updated to point to 'updateblkno'. + * atomically with the insert. Also, the existing item at offset stack->off + * in the target page is updated to point to updateblkno. * * stack->buffer is locked on entry, and is kept locked. + * Likewise for childbuf, if given. */ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, @@ -325,12 +327,29 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, Buffer childbuf, GinStatsData *buildStats) { Page page = BufferGetPage(stack->buffer); - XLogRecData *payloadrdata; + bool result; GinPlaceToPageRC rc; uint16 xlflags = 0; Page childpage = NULL; Page newlpage = NULL, newrpage = NULL; + void *ptp_workspace = NULL; + XLogRecData payloadrdata[10]; + MemoryContext tmpCxt; + MemoryContext oldCxt; + + /* + * We do all the work of this function and its subfunctions in a temporary + * memory context. This avoids leakages and simplifies APIs, since some + * subfunctions allocate storage that has to survive until we've finished + * the WAL insertion. + */ + tmpCxt = AllocSetContextCreate(CurrentMemoryContext, + "ginPlaceToPage temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldCxt = MemoryContextSwitchTo(tmpCxt); if (GinPageIsData(page)) xlflags |= GIN_INSERT_ISDATA; @@ -348,21 +367,36 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } /* - * Try to put the incoming tuple on the page. placeToPage will decide if - * the page needs to be split. + * See if the incoming tuple will fit on the page. beginPlaceToPage will + * decide if the page needs to be split, and will compute the split + * contents if so. See comments for beginPlaceToPage and execPlaceToPage + * functions for more details of the API here. */ - rc = btree->placeToPage(btree, stack->buffer, stack, - insertdata, updateblkno, - &payloadrdata, &newlpage, &newrpage); - if (rc == UNMODIFIED) - return true; - else if (rc == INSERTED) + rc = btree->beginPlaceToPage(btree, stack->buffer, stack, + insertdata, updateblkno, + &ptp_workspace, + &newlpage, &newrpage, + payloadrdata); + + if (rc == GPTP_NO_WORK) { - /* placeToPage did START_CRIT_SECTION() */ + /* Nothing to do */ + result = true; + } + else if (rc == GPTP_INSERT) + { + /* It will fit, perform the insertion */ + START_CRIT_SECTION(); + + /* Perform the page update, and set up WAL data about it */ + btree->execPlaceToPage(btree, stack->buffer, stack, + insertdata, updateblkno, + ptp_workspace, payloadrdata); + MarkBufferDirty(stack->buffer); /* An insert to an internal page finishes the split of the child. */ - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); @@ -387,7 +421,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, * Log information about child if this was an insertion of a * downlink. */ - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) { rdata[0].next = &rdata[1]; @@ -400,7 +434,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, rdata[1].next = &rdata[2]; rdata[2].buffer = childbuf; - rdata[2].buffer_std = false; + rdata[2].buffer_std = true; rdata[2].data = NULL; rdata[2].len = 0; rdata[2].next = payloadrdata; @@ -409,25 +443,31 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, rdata[0].next = payloadrdata; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); + PageSetLSN(page, recptr); - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) PageSetLSN(childpage, recptr); } END_CRIT_SECTION(); - return true; + /* Insertion is complete. */ + result = true; } - else if (rc == SPLIT) + else if (rc == GPTP_SPLIT) { - /* Didn't fit, have to split */ + /* + * Didn't fit, need to split. The split has been computed in newlpage + * and newrpage, which are pointers to palloc'd pages, not associated + * with buffers. stack->buffer is not touched yet. + */ Buffer rbuffer; BlockNumber savedRightLink; - XLogRecData rdata[2]; ginxlogSplit data; Buffer lbuffer = InvalidBuffer; Page newrootpg = NULL; + /* Get a new index page to become the right page */ rbuffer = GinNewBuffer(btree->index); /* During index build, count the new page */ @@ -441,52 +481,27 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, savedRightLink = GinPageGetOpaque(page)->rightlink; - /* - * newlpage and newrpage are pointers to memory pages, not associated - * with buffers. stack->buffer is not touched yet. - */ - + /* Begin setting up WAL record (which we might not use) */ data.node = btree->index->rd_node; data.rblkno = BufferGetBlockNumber(rbuffer); data.flags = xlflags; - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) { - Page childpage = BufferGetPage(childbuf); - - GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; - data.leftChildBlkno = BufferGetBlockNumber(childbuf); data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink; } else data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber; - rdata[0].buffer = InvalidBuffer; - rdata[0].data = (char *) &data; - rdata[0].len = sizeof(ginxlogSplit); - - if (childbuf != InvalidBuffer) - { - rdata[0].next = &rdata[1]; - - rdata[1].buffer = childbuf; - rdata[1].buffer_std = false; - rdata[1].data = NULL; - rdata[1].len = 0; - rdata[1].next = payloadrdata; - } - else - rdata[0].next = payloadrdata; - if (stack->parent == NULL) { /* - * split root, so we need to allocate new left page and place - * pointer on root to left and right page + * splitting the root, so we need to allocate new left page and + * place pointers to left and right page on root page. */ lbuffer = GinNewBuffer(btree->index); - /* During index build, count the newly-added root page */ + /* During index build, count the new left page */ if (buildStats) { if (btree->isData) @@ -508,9 +523,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, /* * Construct a new root page containing downlinks to the new left - * and right pages. (do this in a temporary copy first rather than - * overwriting the original page directly, so that we can still - * abort gracefully if this fails.) + * and right pages. (Do this in a temporary copy rather than + * overwriting the original page directly, since we're not in the + * critical section yet.) */ newrootpg = PageGetTempPage(newrpage); GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ); @@ -521,7 +536,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } else { - /* split non-root page */ + /* splitting a non-root page */ data.rrlink = savedRightLink; data.lblkno = BufferGetBlockNumber(stack->buffer); @@ -531,48 +546,70 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } /* - * Ok, we have the new contents of the left page in a temporary copy - * now (newlpage), and the newly-allocated right block has been filled - * in. The original page is still unchanged. + * OK, we have the new contents of the left page in a temporary copy + * now (newlpage), and likewise for the new contents of the + * newly-allocated right block. The original page is still unchanged. * * If this is a root split, we also have a temporary page containing - * the new contents of the root. Copy the new left page to a - * newly-allocated block, and initialize the (original) root page the - * new copy. Otherwise, copy over the temporary copy of the new left - * page over the old left page. + * the new contents of the root. */ START_CRIT_SECTION(); MarkBufferDirty(rbuffer); MarkBufferDirty(stack->buffer); - if (BufferIsValid(childbuf)) - MarkBufferDirty(childbuf); /* - * Restore the temporary copies over the real buffers. But don't free - * the temporary copies yet, WAL record data points to them. + * Restore the temporary copies over the real buffers. */ if (stack->parent == NULL) { + /* Splitting the root, three pages to update */ MarkBufferDirty(lbuffer); - memcpy(BufferGetPage(stack->buffer), newrootpg, BLCKSZ); + memcpy(page, newrootpg, BLCKSZ); memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ); memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ); } else { - memcpy(BufferGetPage(stack->buffer), newlpage, BLCKSZ); + /* Normal split, only two pages to update */ + memcpy(page, newlpage, BLCKSZ); memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ); } + /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */ + if (BufferIsValid(childbuf)) + { + GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; + MarkBufferDirty(childbuf); + } + /* write WAL record */ if (RelationNeedsWAL(btree->index)) { + XLogRecData rdata[2]; XLogRecPtr recptr; + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogSplit); + + if (BufferIsValid(childbuf)) + { + rdata[0].next = &rdata[1]; + + rdata[1].buffer = childbuf; + rdata[1].buffer_std = true; + rdata[1].data = NULL; + rdata[1].len = 0; + rdata[1].next = payloadrdata; + } + else + rdata[0].next = payloadrdata; + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); - PageSetLSN(BufferGetPage(stack->buffer), recptr); + + PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(rbuffer), recptr); if (stack->parent == NULL) PageSetLSN(BufferGetPage(lbuffer), recptr); @@ -582,33 +619,31 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, END_CRIT_SECTION(); /* - * We can release the lock on the right page now, but keep the - * original buffer locked. + * We can release the locks/pins on the new pages now, but keep + * stack->buffer locked. childbuf doesn't get unlocked either. */ UnlockReleaseBuffer(rbuffer); if (stack->parent == NULL) UnlockReleaseBuffer(lbuffer); - pfree(newlpage); - pfree(newrpage); - if (newrootpg) - pfree(newrootpg); - /* * If we split the root, we're done. Otherwise the split is not * complete until the downlink for the new page has been inserted to * the parent. */ - if (stack->parent == NULL) - return true; - else - return false; + result = (stack->parent == NULL); } else { - elog(ERROR, "unknown return code from GIN placeToPage method: %d", rc); - return false; /* keep compiler quiet */ + elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc); + result = false; /* keep compiler quiet */ } + + /* Clean up temp context */ + MemoryContextSwitchTo(oldCxt); + MemoryContextDelete(tmpCxt); + + return result; } /* |