diff options
Diffstat (limited to 'src/backend/access/gin/ginbtree.c')
-rw-r--r-- | src/backend/access/gin/ginbtree.c | 77 |
1 files changed, 59 insertions, 18 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 39e2c07ad3d..86f938686c3 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -19,6 +19,7 @@ #include "access/xloginsert.h" #include "miscadmin.h" #include "storage/predicate.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -28,6 +29,8 @@ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, Buffer childbuf, GinStatsData *buildStats); static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, GinStatsData *buildStats); +static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, + GinStatsData *buildStats, int access); /* * Lock buffer by needed method for search. @@ -108,7 +111,7 @@ ginFindLeafPage(GinBtree btree, bool searchMode, * encounter on the way. */ if (!searchMode && GinPageIsIncompleteSplit(page)) - ginFinishSplit(btree, stack, false, NULL); + ginFinishOldSplit(btree, stack, NULL, access); /* * ok, page is correctly locked, we should check to move right .., @@ -128,7 +131,7 @@ ginFindLeafPage(GinBtree btree, bool searchMode, page = BufferGetPage(stack->buffer); if (!searchMode && GinPageIsIncompleteSplit(page)) - ginFinishSplit(btree, stack, false, NULL); + ginFinishOldSplit(btree, stack, NULL, access); } if (GinPageIsLeaf(page)) /* we found, return locked page */ @@ -164,8 +167,11 @@ ginFindLeafPage(GinBtree btree, bool searchMode, * Step right from current page. * * The next page is locked first, before releasing the current page. This is - * crucial to protect from concurrent page deletion (see comment in - * ginDeletePage). + * crucial to prevent concurrent VACUUM from deleting a page that we are about + * to step to. (The lock-coupling isn't strictly necessary when we are + * traversing the tree to find an insert location, because page deletion grabs + * a cleanup lock on the root to prevent any concurrent inserts. See Page + * deletion section in the README. But there's no harm in doing it always.) */ Buffer ginStepRight(Buffer buffer, Relation index, int lockmode) @@ -262,7 +268,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack) ptr->parent = root; ptr->off = InvalidOffsetNumber; - ginFinishSplit(btree, ptr, false, NULL); + ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE); } leftmostBlkno = btree->getLeftMostChild(btree, page); @@ -291,7 +297,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack) ptr->parent = root; ptr->off = InvalidOffsetNumber; - ginFinishSplit(btree, ptr, false, NULL); + ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE); } } @@ -670,15 +676,6 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, bool done; bool first = true; - /* - * freestack == false when we encounter an incompletely split page during - * a scan, while freestack == true is used in the normal scenario that a - * split is finished right after the initial insert. - */ - if (!freestack) - elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"", - stack->blkno, RelationGetRelationName(btree->index)); - /* this loop crawls up the stack until the insertion is complete */ do { @@ -686,6 +683,13 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, void *insertdata; BlockNumber updateblkno; +#ifdef USE_INJECTION_POINTS + if (GinPageIsLeaf(BufferGetPage(stack->buffer))) + INJECTION_POINT("gin-leave-leaf-split-incomplete"); + else + INJECTION_POINT("gin-leave-internal-split-incomplete"); +#endif + /* search parent to lock */ LockBuffer(parent->buffer, GIN_EXCLUSIVE); @@ -699,7 +703,7 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, * would fail. */ if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer))) - ginFinishSplit(btree, parent, false, buildStats); + ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE); /* move right if it's needed */ page = BufferGetPage(parent->buffer); @@ -723,7 +727,7 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, page = BufferGetPage(parent->buffer); if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer))) - ginFinishSplit(btree, parent, false, buildStats); + ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE); } /* insert the downlink */ @@ -760,6 +764,43 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, } /* + * An entry point to ginFinishSplit() that is used when we stumble upon an + * existing incompletely split page in the tree, as opposed to completing a + * split that we just made outselves. The difference is that stack->buffer may + * be merely share-locked on entry, and will be upgraded to exclusive mode. + * + * Note: Upgrading the lock momentarily releases it. Doing that in a scan + * would not be OK, because a concurrent VACUUM might delete the page while + * we're not holding the lock. It's OK in an insert, though, because VACUUM + * has a different mechanism that prevents it from running concurrently with + * inserts. (Namely, it holds a cleanup lock on the root.) + */ +static void +ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access) +{ + INJECTION_POINT("gin-finish-incomplete-split"); + elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"", + stack->blkno, RelationGetRelationName(btree->index)); + + if (access == GIN_SHARE) + { + LockBuffer(stack->buffer, GIN_UNLOCK); + LockBuffer(stack->buffer, GIN_EXCLUSIVE); + + if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer))) + { + /* + * Someone else already completed the split while we were not + * holding the lock. + */ + return; + } + } + + ginFinishSplit(btree, stack, false, buildStats); +} + +/* * Insert a value to tree described by stack. * * The value to be inserted is given in 'insertdata'. Its format depends @@ -779,7 +820,7 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, /* If the leaf page was incompletely split, finish the split first */ if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer))) - ginFinishSplit(btree, stack, false, buildStats); + ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE); done = ginPlaceToPage(btree, stack, insertdata, InvalidBlockNumber, |