diff options
Diffstat (limited to 'src/backend/access/gin/ginbtree.c')
-rw-r--r-- | src/backend/access/gin/ginbtree.c | 182 |
1 files changed, 113 insertions, 69 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 7248b06326f..f3c0dfd128f 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -19,7 +19,7 @@ #include "utils/rel.h" /* - * Locks buffer by needed method for search. + * Lock buffer by needed method for search. */ static int ginTraverseLock(Buffer buffer, bool searchMode) @@ -53,9 +53,9 @@ ginTraverseLock(Buffer buffer, bool searchMode) } /* - * Descends the tree to the leaf page that contains or would contain the - * key we're searching for. The key should already be filled in 'btree', - * in tree-type specific manner. If btree->fullScan is true, descends to the + * Descend the tree to the leaf page that contains or would contain the key + * we're searching for. The key should already be filled in 'btree', in + * tree-type specific manner. If btree->fullScan is true, descends to the * leftmost leaf page. * * If 'searchmode' is false, on return stack->buffer is exclusively locked, @@ -63,13 +63,13 @@ ginTraverseLock(Buffer buffer, bool searchMode) * is share-locked, and stack->parent is NULL. */ GinBtreeStack * -ginFindLeafPage(GinBtree btree, BlockNumber rootBlkno, bool searchMode) +ginFindLeafPage(GinBtree btree, bool searchMode) { GinBtreeStack *stack; stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); - stack->blkno = rootBlkno; - stack->buffer = ReadBuffer(btree->index, rootBlkno); + stack->blkno = btree->rootBlkno; + stack->buffer = ReadBuffer(btree->index, btree->rootBlkno); stack->parent = NULL; stack->predictNumber = 1; @@ -89,7 +89,7 @@ ginFindLeafPage(GinBtree btree, BlockNumber rootBlkno, bool searchMode) * ok, page is correctly locked, we should check to move right .., * root never has a right link, so small optimization */ - while (btree->fullScan == FALSE && stack->blkno != rootBlkno && + while (btree->fullScan == FALSE && stack->blkno != btree->rootBlkno && btree->isMoveRight(btree, page)) { BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; @@ -146,7 +146,7 @@ ginStepRight(Buffer buffer, Relation index, int lockmode) Page page = BufferGetPage(buffer); bool isLeaf = GinPageIsLeaf(page); bool isData = GinPageIsData(page); - BlockNumber blkno = GinPageGetOpaque(page)->rightlink; + BlockNumber blkno = GinPageGetOpaque(page)->rightlink; nextbuffer = ReadBuffer(index, blkno); LockBuffer(nextbuffer, lockmode); @@ -158,10 +158,10 @@ ginStepRight(Buffer buffer, Relation index, int lockmode) elog(ERROR, "right sibling of GIN page is of different type"); /* - * Given the proper lock sequence above, we should never land on a - * deleted page. + * Given the proper lock sequence above, we should never land on a deleted + * page. */ - if (GinPageIsDeleted(page)) + if (GinPageIsDeleted(page)) elog(ERROR, "right sibling of GIN page was deleted"); return nextbuffer; @@ -183,14 +183,12 @@ freeGinBtreeStack(GinBtreeStack *stack) } /* - * Try to find parent for current stack position, returns correct - * parent and child's offset in stack->parent. - * Function should never release root page to prevent conflicts - * with vacuum process + * Try to find parent for current stack position. Returns correct parent and + * child's offset in stack->parent. The root page is never released, to + * to prevent conflict with vacuum process. */ void -ginFindParents(GinBtree btree, GinBtreeStack *stack, - BlockNumber rootBlkno) +ginFindParents(GinBtree btree, GinBtreeStack *stack) { Page page; Buffer buffer; @@ -204,8 +202,8 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack, { /* XLog mode... */ root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); - root->blkno = rootBlkno; - root->buffer = ReadBuffer(btree->index, rootBlkno); + root->blkno = btree->rootBlkno; + root->buffer = ReadBuffer(btree->index, btree->rootBlkno); LockBuffer(root->buffer, GIN_EXCLUSIVE); root->parent = NULL; } @@ -221,8 +219,8 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack, root = root->parent; } - Assert(root->blkno == rootBlkno); - Assert(BufferGetBlockNumber(root->buffer) == rootBlkno); + Assert(root->blkno == btree->rootBlkno); + Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno); LockBuffer(root->buffer, GIN_EXCLUSIVE); } root->off = InvalidOffsetNumber; @@ -268,7 +266,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack, ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); ptr->blkno = blkno; ptr->buffer = buffer; - ptr->parent = root; /* it's may be wrong, but in next call we will + ptr->parent = root; /* it may be wrong, but in next call we will * correct */ ptr->off = offset; stack->parent = ptr; @@ -280,21 +278,35 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack, } /* - * Returns true if the insertion is done, false if the page was split and - * downlink insertion is pending. + * Insert a new item to a page. + * + * Returns true if the insertion was finished. On false, the page was split and + * the parent needs to be updated. (a root split returns true as it doesn't + * need any further action by the caller to complete) + * + * When inserting a downlink to a internal page, the existing item at the + * given location is updated to point to 'updateblkno'. * * stack->buffer is locked on entry, and is kept locked. */ static bool -ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack, +ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, + void *insertdata, BlockNumber updateblkno, GinStatsData *buildStats) { Page page = BufferGetPage(stack->buffer); XLogRecData *rdata; bool fit; + /* + * Try to put the incoming tuple on the page. If it doesn't fit, + * placeToPage method will return false and leave the page unmodified, and + * we'll have to split the page. + */ START_CRIT_SECTION(); - fit = btree->placeToPage(btree, stack->buffer, stack->off, &rdata); + fit = btree->placeToPage(btree, stack->buffer, stack->off, + insertdata, updateblkno, + &rdata); if (fit) { MarkBufferDirty(stack->buffer); @@ -324,18 +336,7 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack, END_CRIT_SECTION(); rbuffer = GinNewBuffer(btree->index); - - savedRightLink = GinPageGetOpaque(page)->rightlink; - - /* - * newlpage is a pointer to memory page, it is not associated with - * a buffer. stack->buffer is not touched yet. - */ - newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata); - - ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno; - - /* During index build, count the newly-split page */ + /* During index build, count the new page */ if (buildStats) { if (btree->isData) @@ -344,6 +345,18 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack, buildStats->nEntryPages++; } + savedRightLink = GinPageGetOpaque(page)->rightlink; + + /* + * newlpage is a pointer to memory page, it is not associated with a + * buffer. stack->buffer is not touched yet. + */ + newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, + insertdata, updateblkno, + &rdata); + + ((ginxlogSplit *) (rdata->data))->rootBlkno = btree->rootBlkno; + parent = stack->parent; if (parent == NULL) @@ -354,6 +367,15 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack, */ Buffer lbuffer = GinNewBuffer(btree->index); + /* During index build, count the new page */ + if (buildStats) + { + if (btree->isData) + buildStats->nDataPages++; + else + buildStats->nEntryPages++; + } + ((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE; ((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber; @@ -434,46 +456,27 @@ ginPlaceToPage(GinBtree btree, BlockNumber rootBlkno, GinBtreeStack *stack, } /* - * Insert value (stored in GinBtree) to tree described by stack + * Finish a split by inserting the downlink for the new page to parent. * - * During an index build, buildStats is non-null and the counters - * it contains are incremented as needed. + * On entry, stack->buffer is exclusively locked. * * NB: the passed-in stack is freed, as though by freeGinBtreeStack. */ void -ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) +ginFinishSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) { - GinBtreeStack *parent; - BlockNumber rootBlkno; Page page; - - /* extract root BlockNumber from stack */ - Assert(stack != NULL); - parent = stack; - while (parent->parent) - parent = parent->parent; - rootBlkno = parent->blkno; - Assert(BlockNumberIsValid(rootBlkno)); + bool done; /* this loop crawls up the stack until the insertion is complete */ - for (;;) + do { - bool done; - - done = ginPlaceToPage(btree, rootBlkno, stack, buildStats); - - /* just to be extra sure we don't delete anything by accident... */ - btree->isDelete = FALSE; - - if (done) - { - LockBuffer(stack->buffer, GIN_UNLOCK); - freeGinBtreeStack(stack); - break; - } + GinBtreeStack *parent = stack->parent; + void *insertdata; + BlockNumber updateblkno; - btree->prepareDownlink(btree, stack->buffer); + insertdata = btree->prepareDownlink(btree, stack->buffer); + updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink; /* search parent to lock */ LockBuffer(parent->buffer, GIN_EXCLUSIVE); @@ -491,7 +494,7 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) * plain search... */ LockBuffer(parent->buffer, GIN_UNLOCK); - ginFindParents(btree, stack, rootBlkno); + ginFindParents(btree, stack); parent = stack->parent; Assert(parent != NULL); break; @@ -502,8 +505,49 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) page = BufferGetPage(parent->buffer); } + /* release the child */ UnlockReleaseBuffer(stack->buffer); pfree(stack); stack = parent; + + /* insert the downlink to parent */ + done = ginPlaceToPage(btree, stack, + insertdata, updateblkno, + buildStats); + pfree(insertdata); + } while (!done); + LockBuffer(stack->buffer, GIN_UNLOCK); + + /* free the rest of the stack */ + freeGinBtreeStack(stack); +} + +/* + * Insert a value to tree described by stack. + * + * The value to be inserted is given in 'insertdata'. Its format depends + * on whether this is an entry or data tree, ginInsertValue just passes it + * through to the tree-specific callback function. + * + * During an index build, buildStats is non-null and the counters it contains + * are incremented as needed. + * + * NB: the passed-in stack is freed, as though by freeGinBtreeStack. + */ +void +ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, + GinStatsData *buildStats) +{ + bool done; + + done = ginPlaceToPage(btree, stack, + insertdata, InvalidBlockNumber, + buildStats); + if (done) + { + LockBuffer(stack->buffer, GIN_UNLOCK); + freeGinBtreeStack(stack); } + else + ginFinishSplit(btree, stack, buildStats); } |