summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginentrypage.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/gin/ginentrypage.c')
-rw-r--r--src/backend/access/gin/ginentrypage.c329
1 files changed, 189 insertions, 140 deletions
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index e56034202ad..9749a1be786 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -14,7 +14,7 @@
#include "postgres.h"
-#include "access/gin.h"
+#include "access/gin_private.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
@@ -24,107 +24,116 @@
* If the tuple would be too big to be stored, function throws a suitable
* error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE.
*
- * On leaf pages, Index tuple has non-traditional layout. Tuple may contain
- * posting list or root blocknumber of posting tree.
- * Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno)
- * 1) Posting list
- * - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual
- * - ItemPointerGetBlockNumber(&itup->t_tid) contains original
- * size of tuple (without posting list).
- * Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
- * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
- * of elements in posting list (number of heap itempointers)
- * Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n)
- * - After standard part of tuple there is a posting list, ie, array
- * of heap itempointers
- * Macros: GinGetPosting(itup)
- * 2) Posting tree
- * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
- * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
- * root of posting tree
- * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number
- * GIN_TREE_POSTING, which distinguishes this from posting-list case
- *
- * Attributes of an index tuple are different for single and multicolumn index.
- * For single-column case, index tuple stores only value to be indexed.
- * For multicolumn case, it stores two attributes: column number of value
- * and value.
+ * See src/backend/access/gin/README for a description of the index tuple
+ * format that is being built here. We build on the assumption that we
+ * are making a leaf-level key entry containing a posting list of nipd items.
+ * If the caller is actually trying to make a posting-tree entry, non-leaf
+ * entry, or pending-list entry, it should pass nipd = 0 and then overwrite
+ * the t_tid fields as necessary. In any case, ipd can be NULL to skip
+ * copying any itempointers into the posting list; the caller is responsible
+ * for filling the posting list afterwards, if ipd = NULL and nipd > 0.
*/
IndexTuple
-GinFormTuple(Relation index, GinState *ginstate,
- OffsetNumber attnum, Datum key,
- ItemPointerData *ipd, uint32 nipd, bool errorTooBig)
+GinFormTuple(GinState *ginstate,
+ OffsetNumber attnum, Datum key, GinNullCategory category,
+ ItemPointerData *ipd, uint32 nipd,
+ bool errorTooBig)
{
- bool isnull[2] = {FALSE, FALSE};
+ Datum datums[2];
+ bool isnull[2];
IndexTuple itup;
uint32 newsize;
+ /* Build the basic tuple: optional column number, plus key datum */
if (ginstate->oneCol)
- itup = index_form_tuple(ginstate->origTupdesc, &key, isnull);
+ {
+ datums[0] = key;
+ isnull[0] = (category != GIN_CAT_NORM_KEY);
+ }
else
{
- Datum datums[2];
-
datums[0] = UInt16GetDatum(attnum);
+ isnull[0] = false;
datums[1] = key;
- itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull);
+ isnull[1] = (category != GIN_CAT_NORM_KEY);
}
- GinSetOrigSizePosting(itup, IndexTupleSize(itup));
+ itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull);
+
+ /*
+ * Determine and store offset to the posting list, making sure there is
+ * room for the category byte if needed.
+ *
+ * Note: because index_form_tuple MAXALIGNs the tuple size, there may well
+ * be some wasted pad space. Is it worth recomputing the data length to
+ * prevent that? That would also allow us to Assert that the real data
+ * doesn't overlap the GinNullCategory byte, which this code currently
+ * takes on faith.
+ */
+ newsize = IndexTupleSize(itup);
- if (nipd > 0)
+ if (IndexTupleHasNulls(itup))
{
- newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
- if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize))
- {
- if (errorTooBig)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
- (unsigned long) newsize,
- (unsigned long) Min(INDEX_SIZE_MASK,
- GinMaxItemSize),
- RelationGetRelationName(index))));
- return NULL;
- }
+ uint32 minsize;
+
+ Assert(category != GIN_CAT_NORM_KEY);
+ minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory);
+ newsize = Max(newsize, minsize);
+ }
+ newsize = SHORTALIGN(newsize);
+
+ GinSetPostingOffset(itup, newsize);
+
+ GinSetNPosting(itup, nipd);
+
+ /*
+ * Add space needed for posting list, if any. Then check that the tuple
+ * won't be too big to store.
+ */
+ newsize += sizeof(ItemPointerData) * nipd;
+ newsize = MAXALIGN(newsize);
+ if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize))
+ {
+ if (errorTooBig)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+ (unsigned long) newsize,
+ (unsigned long) Min(INDEX_SIZE_MASK,
+ GinMaxItemSize),
+ RelationGetRelationName(ginstate->index))));
+ pfree(itup);
+ return NULL;
+ }
+
+ /*
+ * Resize tuple if needed
+ */
+ if (newsize != IndexTupleSize(itup))
+ {
itup = repalloc(itup, newsize);
- /* set new size */
+ /* set new size in tuple header */
itup->t_info &= ~INDEX_SIZE_MASK;
itup->t_info |= newsize;
-
- if (ipd)
- memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
- GinSetNPosting(itup, nipd);
}
- else
- {
- /*
- * Gin tuple without any ItemPointers should be large enough to keep
- * one ItemPointer, to prevent inconsistency between
- * ginHeapTupleFastCollect and ginEntryInsert called by
- * ginHeapTupleInsert. ginHeapTupleFastCollect forms tuple without
- * extra pointer to heap, but ginEntryInsert (called for pending list
- * cleanup during vacuum) will form the same tuple with one
- * ItemPointer.
- */
- newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData));
- if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize))
- {
- if (errorTooBig)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
- (unsigned long) newsize,
- (unsigned long) Min(INDEX_SIZE_MASK,
- GinMaxItemSize),
- RelationGetRelationName(index))));
- return NULL;
- }
- GinSetNPosting(itup, 0);
+ /*
+ * Insert category byte, if needed
+ */
+ if (category != GIN_CAT_NORM_KEY)
+ {
+ Assert(IndexTupleHasNulls(itup));
+ GinSetNullCategory(itup, ginstate, category);
}
+
+ /*
+ * Copy in the posting list, if provided
+ */
+ if (ipd)
+ memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
+
return itup;
}
@@ -140,7 +149,8 @@ GinShortenTuple(IndexTuple itup, uint32 nipd)
Assert(nipd <= GinGetNPosting(itup));
- newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd);
+ newsize = GinGetPostingOffset(itup) + sizeof(ItemPointerData) * nipd;
+ newsize = MAXALIGN(newsize);
Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK));
@@ -151,8 +161,45 @@ GinShortenTuple(IndexTuple itup, uint32 nipd)
}
/*
+ * Form a non-leaf entry tuple by copying the key data from the given tuple,
+ * which can be either a leaf or non-leaf entry tuple.
+ *
+ * Any posting list in the source tuple is not copied. The specified child
+ * block number is inserted into t_tid.
+ */
+static IndexTuple
+GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk)
+{
+ IndexTuple nitup;
+
+ if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
+ {
+ /* Tuple contains a posting list, just copy stuff before that */
+ uint32 origsize = GinGetPostingOffset(itup);
+
+ origsize = MAXALIGN(origsize);
+ nitup = (IndexTuple) palloc(origsize);
+ memcpy(nitup, itup, origsize);
+ /* ... be sure to fix the size header field ... */
+ nitup->t_info &= ~INDEX_SIZE_MASK;
+ nitup->t_info |= origsize;
+ }
+ else
+ {
+ /* Copy the tuple as-is */
+ nitup = (IndexTuple) palloc(IndexTupleSize(itup));
+ memcpy(nitup, itup, IndexTupleSize(itup));
+ }
+
+ /* Now insert the correct downlink */
+ GinSetDownlink(nitup, childblk);
+
+ return nitup;
+}
+
+/*
* Entry tree is a "static", ie tuple never deletes from it,
- * so we don't use right bound, we use rightest key instead.
+ * so we don't use right bound, we use rightmost key instead.
*/
static IndexTuple
getRightMostTuple(Page page)
@@ -166,16 +213,20 @@ static bool
entryIsMoveRight(GinBtree btree, Page page)
{
IndexTuple itup;
+ OffsetNumber attnum;
+ Datum key;
+ GinNullCategory category;
if (GinPageRightMost(page))
return FALSE;
itup = getRightMostTuple(page);
+ attnum = gintuple_get_attrnum(btree->ginstate, itup);
+ key = gintuple_get_key(btree->ginstate, itup, &category);
if (ginCompareAttEntries(btree->ginstate,
- btree->entryAttnum, btree->entryValue,
- gintuple_get_attrnum(btree->ginstate, itup),
- gin_index_getattr(btree->ginstate, itup)) > 0)
+ btree->entryAttnum, btree->entryKey, btree->entryCategory,
+ attnum, key, category) > 0)
return TRUE;
return FALSE;
@@ -183,7 +234,7 @@ entryIsMoveRight(GinBtree btree, Page page)
/*
* Find correct tuple in non-leaf page. It supposed that
- * page correctly choosen and searching value SHOULD be on page
+ * page correctly chosen and searching value SHOULD be on page
*/
static BlockNumber
entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
@@ -216,23 +267,31 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
OffsetNumber mid = low + ((high - low) / 2);
if (mid == maxoff && GinPageRightMost(page))
+ {
/* Right infinity */
result = -1;
+ }
else
{
+ OffsetNumber attnum;
+ Datum key;
+ GinNullCategory category;
+
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
+ attnum = gintuple_get_attrnum(btree->ginstate, itup);
+ key = gintuple_get_key(btree->ginstate, itup, &category);
result = ginCompareAttEntries(btree->ginstate,
btree->entryAttnum,
- btree->entryValue,
- gintuple_get_attrnum(btree->ginstate, itup),
- gin_index_getattr(btree->ginstate, itup));
+ btree->entryKey,
+ btree->entryCategory,
+ attnum, key, category);
}
if (result == 0)
{
stack->off = mid;
- Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
- return GinItemPointerGetBlockNumber(&(itup)->t_tid);
+ Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
+ return GinGetDownlink(itup);
}
else if (result > 0)
low = mid + 1;
@@ -244,13 +303,13 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
stack->off = high;
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
- Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
- return GinItemPointerGetBlockNumber(&(itup)->t_tid);
+ Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO);
+ return GinGetDownlink(itup);
}
/*
* Searches correct position for value on leaf page.
- * Page should be corrrectly choosen.
+ * Page should be correctly chosen.
* Returns true if value found on page.
*/
static bool
@@ -259,7 +318,6 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
Page page = BufferGetPage(stack->buffer);
OffsetNumber low,
high;
- IndexTuple itup;
Assert(GinPageIsLeaf(page));
Assert(!GinPageIsData(page));
@@ -284,14 +342,20 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
while (high > low)
{
OffsetNumber mid = low + ((high - low) / 2);
+ IndexTuple itup;
+ OffsetNumber attnum;
+ Datum key;
+ GinNullCategory category;
int result;
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
+ attnum = gintuple_get_attrnum(btree->ginstate, itup);
+ key = gintuple_get_key(btree->ginstate, itup, &category);
result = ginCompareAttEntries(btree->ginstate,
btree->entryAttnum,
- btree->entryValue,
- gintuple_get_attrnum(btree->ginstate, itup),
- gin_index_getattr(btree->ginstate, itup));
+ btree->entryKey,
+ btree->entryCategory,
+ attnum, key, category);
if (result == 0)
{
stack->off = mid;
@@ -321,7 +385,7 @@ entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber sto
if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
- if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
+ if (GinGetDownlink(itup) == blkno)
return storedOff;
/*
@@ -331,7 +395,7 @@ entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber sto
for (i = storedOff + 1; i <= maxoff; i++)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
- if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
+ if (GinGetDownlink(itup) == blkno)
return i;
}
maxoff = storedOff - 1;
@@ -341,7 +405,7 @@ entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber sto
for (i = FirstOffsetNumber; i <= maxoff; i++)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
- if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
+ if (GinGetDownlink(itup) == blkno)
return i;
}
@@ -358,7 +422,7 @@ entryGetLeftMostPage(GinBtree btree, Page page)
Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
- return GinItemPointerGetBlockNumber(&(itup)->t_tid);
+ return GinGetDownlink(itup);
}
static bool
@@ -406,7 +470,7 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
- ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
+ GinSetDownlink(itup, btree->rightblkno);
ret = btree->rightblkno;
}
@@ -422,10 +486,11 @@ static void
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
- static XLogRecData rdata[3];
OffsetNumber placed;
- static ginxlogInsert data;
int cnt = 0;
+ /* these must be static so they can be returned to caller */
+ static XLogRecData rdata[3];
+ static ginxlogInsert data;
*prdata = rdata;
data.updateBlkno = entryPreparePage(btree, page, off);
@@ -475,31 +540,6 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
}
/*
- * Returns new tuple with copied value from source tuple.
- * New tuple will not store posting list
- */
-static IndexTuple
-copyIndexTuple(IndexTuple itup, Page page)
-{
- IndexTuple nitup;
-
- if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
- {
- nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
- memcpy(nitup, itup, GinGetOrigSizePosting(itup));
- nitup->t_info &= ~INDEX_SIZE_MASK;
- nitup->t_info |= GinGetOrigSizePosting(itup);
- }
- else
- {
- nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
- memcpy(nitup, itup, IndexTupleSize(itup));
- }
-
- return nitup;
-}
-
-/*
* Place tuple and split page, original buffer(lbuf) leaves untouched,
* returns shadow page of lbuf filled new data.
* Tuples are distributed between pages by equal size on its, not
@@ -508,26 +548,27 @@ copyIndexTuple(IndexTuple itup, Page page)
static Page
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
{
- static XLogRecData rdata[2];
OffsetNumber i,
maxoff,
separator = InvalidOffsetNumber;
Size totalsize = 0;
Size lsize = 0,
size;
- static char tupstore[2 * BLCKSZ];
char *ptr;
IndexTuple itup,
leftrightmost = NULL;
- static ginxlogSplit data;
Page page;
Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf));
Page rpage = BufferGetPage(rbuf);
Size pageSize = PageGetPageSize(lpage);
+ /* these must be static so they can be returned to caller */
+ static XLogRecData rdata[2];
+ static ginxlogSplit data;
+ static char tupstore[2 * BLCKSZ];
*prdata = rdata;
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
- InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
+ InvalidOffsetNumber : GinGetDownlink(btree->entry);
data.updateBlkno = entryPreparePage(btree, lpage, off);
maxoff = PageGetMaxOffsetNumber(lpage);
@@ -588,8 +629,8 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
ptr += MAXALIGN(IndexTupleSize(itup));
}
- btree->entry = copyIndexTuple(leftrightmost, lpage);
- ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
+ btree->entry = GinFormInteriorTuple(leftrightmost, lpage,
+ BufferGetBlockNumber(lbuf));
btree->rightblkno = BufferGetBlockNumber(rbuf);
@@ -627,8 +668,7 @@ ginPageGetLinkItup(Buffer buf)
Page page = BufferGetPage(buf);
itup = getRightMostTuple(page);
- nitup = copyIndexTuple(itup, page);
- ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
+ nitup = GinFormInteriorTuple(itup, page, BufferGetBlockNumber(buf));
return nitup;
}
@@ -656,12 +696,20 @@ ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
pfree(itup);
}
+/*
+ * Set up GinBtree for entry page access
+ *
+ * Note: during WAL recovery, there may be no valid data in ginstate
+ * other than a faked-up Relation pointer; the key datum is bogus too.
+ */
void
-ginPrepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum value, GinState *ginstate)
+ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
+ Datum key, GinNullCategory category,
+ GinState *ginstate)
{
memset(btree, 0, sizeof(GinBtreeData));
- btree->index = index;
+ btree->index = ginstate->index;
btree->ginstate = ginstate;
btree->findChildPage = entryLocateEntry;
@@ -680,6 +728,7 @@ ginPrepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum v
btree->isBuild = FALSE;
btree->entryAttnum = attnum;
- btree->entryValue = value;
+ btree->entryKey = key;
+ btree->entryCategory = category;
btree->isDelete = FALSE;
}