summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginget.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/gin/ginget.c')
-rw-r--r--src/backend/access/gin/ginget.c858
1 files changed, 501 insertions, 357 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index ed1e71c9d6f..aaef6efbb50 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -14,7 +14,7 @@
#include "postgres.h"
-#include "access/gin.h"
+#include "access/gin_private.h"
#include "access/relscan.h"
#include "catalog/index.h"
#include "miscadmin.h"
@@ -34,10 +34,43 @@ typedef struct pendingPosition
/*
- * Tries to refind previously taken ItemPointer on page.
+ * Convenience function for invoking a key's consistentFn
*/
static bool
-findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
+callConsistentFn(GinState *ginstate, GinScanKey key)
+{
+ /*
+ * If we're dealing with a dummy EVERYTHING key, we don't want to call
+ * the consistentFn; just claim it matches.
+ */
+ if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
+ {
+ key->recheckCurItem = false;
+ return true;
+ }
+
+ /*
+ * Initialize recheckCurItem in case the consistentFn doesn't know it
+ * should set it. The safe assumption in that case is to force recheck.
+ */
+ key->recheckCurItem = true;
+
+ return DatumGetBool(FunctionCall8(&ginstate->consistentFn[key->attnum - 1],
+ PointerGetDatum(key->entryRes),
+ UInt16GetDatum(key->strategy),
+ key->query,
+ UInt32GetDatum(key->nuserentries),
+ PointerGetDatum(key->extra_data),
+ PointerGetDatum(&key->recheckCurItem),
+ PointerGetDatum(key->queryValues),
+ PointerGetDatum(key->queryCategories)));
+}
+
+/*
+ * Tries to refind previously taken ItemPointer on a posting page.
+ */
+static bool
+findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off)
{
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
int res;
@@ -79,7 +112,9 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
return false; /* no more pages */
LockBuffer(stack->buffer, GIN_UNLOCK);
- stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
+ stack->buffer = ReleaseAndReadBuffer(stack->buffer,
+ btree->index,
+ stack->blkno);
LockBuffer(stack->buffer, GIN_SHARE);
stack->off = FirstOffsetNumber;
}
@@ -88,17 +123,19 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
}
/*
- * Does fullscan of posting tree and saves ItemPointers
- * in scanEntry->partialMatch TIDBitmap
+ * Scan all pages of a posting tree and save all its heap ItemPointers
+ * in scanEntry->matchBitmap
*/
static void
-scanForItems(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree)
+scanPostingTree(Relation index, GinScanEntry scanEntry,
+ BlockNumber rootPostingTree)
{
GinPostingTreeScan *gdi;
Buffer buffer;
Page page;
BlockNumber blkno;
+ /* Descend to the leftmost leaf page */
gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE);
buffer = ginScanBeginPostingTree(gdi);
@@ -108,51 +145,72 @@ scanForItems(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree
pfree(gdi);
/*
- * Goes through all leaves
+ * Loop iterates through all leaf pages of posting tree
*/
for (;;)
{
page = BufferGetPage(buffer);
- if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
+ if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 &&
+ GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
{
- tbm_add_tuples(scanEntry->partialMatch,
+ tbm_add_tuples(scanEntry->matchBitmap,
(ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff, false);
scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
}
- blkno = GinPageGetOpaque(page)->rightlink;
if (GinPageRightMost(page))
- {
- UnlockReleaseBuffer(buffer);
- return; /* no more pages */
- }
+ break; /* no more pages */
+ blkno = GinPageGetOpaque(page)->rightlink;
LockBuffer(buffer, GIN_UNLOCK);
buffer = ReleaseAndReadBuffer(buffer, index, blkno);
LockBuffer(buffer, GIN_SHARE);
}
+
+ UnlockReleaseBuffer(buffer);
}
/*
- * Collects all ItemPointer into the TIDBitmap struct
- * for entries partially matched to search entry.
+ * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
+ * match the search entry. This supports three different match modes:
+ *
+ * 1. Partial-match support: scan from current point until the
+ * comparePartialFn says we're done.
+ * 2. SEARCH_MODE_ALL: scan from current point (which should be first
+ * key for the current attnum) until we hit null items or end of attnum
+ * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
+ * key for the current attnum) until we hit end of attnum
*
- * Returns true if done, false if it's needed to restart scan from scratch
+ * Returns true if done, false if it's necessary to restart scan from scratch
*/
static bool
-computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry)
+collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
+ GinScanEntry scanEntry)
{
- Page page;
- IndexTuple itup;
- Datum idatum;
- int32 cmp;
+ OffsetNumber attnum;
+ Form_pg_attribute attr;
- scanEntry->partialMatch = tbm_create(work_mem * 1024L);
+ /* Initialize empty bitmap result */
+ scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
+
+ /* Null query cannot partial-match anything */
+ if (scanEntry->isPartialMatch &&
+ scanEntry->queryCategory != GIN_CAT_NORM_KEY)
+ return true;
+
+ /* Locate tupdesc entry for key column (for attbyval/attlen data) */
+ attnum = scanEntry->attnum;
+ attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
for (;;)
{
+ Page page;
+ IndexTuple itup;
+ Datum idatum;
+ GinNullCategory icategory;
+
/*
* stack->off points to the interested entry, buffer is already locked
*/
@@ -165,56 +223,84 @@ computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry
/*
* If tuple stores another attribute then stop scan
*/
- if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum)
+ if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
return true;
- idatum = gin_index_getattr(btree->ginstate, itup);
+ /* Safe to fetch attribute value */
+ idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
-
- /*----------
- * Check of partial match.
- * case cmp == 0 => match
- * case cmp > 0 => not match and finish scan
- * case cmp < 0 => not match and continue scan
- *----------
+ /*
+ * Check for appropriate scan stop conditions
*/
- cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[scanEntry->attnum - 1],
- scanEntry->entry,
- idatum,
- UInt16GetDatum(scanEntry->strategy),
- PointerGetDatum(scanEntry->extra_data)));
+ if (scanEntry->isPartialMatch)
+ {
+ int32 cmp;
- if (cmp > 0)
- return true;
- else if (cmp < 0)
+ /*
+ * In partial match, stop scan at any null (including
+ * placeholders); partial matches never match nulls
+ */
+ if (icategory != GIN_CAT_NORM_KEY)
+ return true;
+
+ /*----------
+ * Check of partial match.
+ * case cmp == 0 => match
+ * case cmp > 0 => not match and finish scan
+ * case cmp < 0 => not match and continue scan
+ *----------
+ */
+ cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[attnum - 1],
+ scanEntry->queryKey,
+ idatum,
+ UInt16GetDatum(scanEntry->strategy),
+ PointerGetDatum(scanEntry->extra_data)));
+
+ if (cmp > 0)
+ return true;
+ else if (cmp < 0)
+ {
+ stack->off++;
+ continue;
+ }
+ }
+ else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
{
- stack->off++;
- continue;
+ /*
+ * In ALL mode, we are not interested in null items, so we can
+ * stop if we get to a null-item placeholder (which will be the
+ * last entry for a given attnum). We do want to include NULL_KEY
+ * and EMPTY_ITEM entries, though.
+ */
+ if (icategory == GIN_CAT_NULL_ITEM)
+ return true;
}
+ /*
+ * OK, we want to return the TIDs listed in this entry.
+ */
if (GinIsPostingTree(itup))
{
BlockNumber rootPostingTree = GinGetPostingTree(itup);
- Datum newDatum,
- savedDatum = datumCopy(
- idatum,
- btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval,
- btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attlen
- );
/*
* We should unlock current page (but not unpin) during tree scan
* to prevent deadlock with vacuum processes.
*
- * We save current entry value (savedDatum) to be able to refind
+ * We save current entry value (idatum) to be able to re-find
* our tuple after re-locking
*/
+ if (icategory == GIN_CAT_NORM_KEY)
+ idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
+
LockBuffer(stack->buffer, GIN_UNLOCK);
- scanForItems(btree->index, scanEntry, rootPostingTree);
+
+ /* Collect all the TIDs in this entry's posting tree */
+ scanPostingTree(btree->index, scanEntry, rootPostingTree);
/*
* We lock again the entry page and while it was unlocked insert
- * might occured, so we need to refind our position
+ * might have occurred, so we need to re-find our position.
*/
LockBuffer(stack->buffer, GIN_SHARE);
page = BufferGetPage(stack->buffer);
@@ -222,45 +308,49 @@ computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry
{
/*
* Root page becomes non-leaf while we unlock it. We will
- * start again, this situation doesn't cause often - root can
- * became a non-leaf only one per life of index.
+ * start again, this situation doesn't occur often - root can
+ * became a non-leaf only once per life of index.
*/
-
return false;
}
+ /* Search forward to re-find idatum */
for (;;)
{
+ Datum newDatum;
+ GinNullCategory newCategory;
+
if (moveRightIfItNeeded(btree, stack) == false)
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
page = BufferGetPage(stack->buffer);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
- newDatum = gin_index_getattr(btree->ginstate, itup);
- if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum)
+ if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
+ newDatum = gintuple_get_key(btree->ginstate, itup,
+ &newCategory);
- if (ginCompareEntries(btree->ginstate, scanEntry->attnum,
- newDatum, savedDatum) == 0)
- {
- /* Found! */
- if (btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval == false)
- pfree(DatumGetPointer(savedDatum));
- break;
- }
+ if (ginCompareEntries(btree->ginstate, attnum,
+ newDatum, newCategory,
+ idatum, icategory) == 0)
+ break; /* Found! */
stack->off++;
}
+
+ if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
+ pfree(DatumGetPointer(idatum));
}
else
{
- tbm_add_tuples(scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false);
+ tbm_add_tuples(scanEntry->matchBitmap,
+ GinGetPosting(itup), GinGetNPosting(itup), false);
scanEntry->predictNumberResult += GinGetNPosting(itup);
}
/*
- * Ok, we save ItemPointers, go to the next entry
+ * Done with this entry, go to the next
*/
stack->off++;
}
@@ -272,19 +362,20 @@ computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry
* Start* functions setup beginning state of searches: finds correct buffer and pins it.
*/
static void
-startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
+startScanEntry(GinState *ginstate, GinScanEntry entry)
{
GinBtreeData btreeEntry;
GinBtreeStack *stackEntry;
Page page;
- bool needUnlock = TRUE;
+ bool needUnlock;
+restartScanEntry:
entry->buffer = InvalidBuffer;
entry->offset = InvalidOffsetNumber;
entry->list = NULL;
entry->nlist = 0;
- entry->partialMatch = NULL;
- entry->partialMatchResult = NULL;
+ entry->matchBitmap = NULL;
+ entry->matchResult = NULL;
entry->reduceResult = FALSE;
entry->predictNumberResult = 0;
@@ -298,46 +389,50 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
* we should find entry, and begin scan of posting tree or just store
* posting list in memory
*/
-
- ginPrepareEntryScan(&btreeEntry, index, entry->attnum, entry->entry, ginstate);
+ ginPrepareEntryScan(&btreeEntry, entry->attnum,
+ entry->queryKey, entry->queryCategory,
+ ginstate);
btreeEntry.searchMode = TRUE;
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
page = BufferGetPage(stackEntry->buffer);
+ needUnlock = TRUE;
entry->isFinished = TRUE;
- if (entry->isPartialMatch)
+ if (entry->isPartialMatch ||
+ entry->queryCategory == GIN_CAT_EMPTY_QUERY)
{
/*
- * btreeEntry.findItem points to the first equal or greater value than
- * needed. So we will scan further and collect all ItemPointers
+ * btreeEntry.findItem locates the first item >= given search key.
+ * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
+ * because of the way the GIN_CAT_EMPTY_QUERY category code is
+ * assigned.) We scan forward from there and collect all TIDs needed
+ * for the entry type.
*/
btreeEntry.findItem(&btreeEntry, stackEntry);
- if (computePartialMatchList(&btreeEntry, stackEntry, entry) == false)
+ if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false)
{
/*
* GIN tree was seriously restructured, so we will cleanup all
* found data and rescan. See comments near 'return false' in
- * computePartialMatchList()
+ * collectMatchBitmap()
*/
- if (entry->partialMatch)
+ if (entry->matchBitmap)
{
- if (entry->partialMatchIterator)
- tbm_end_iterate(entry->partialMatchIterator);
- entry->partialMatchIterator = NULL;
- tbm_free(entry->partialMatch);
- entry->partialMatch = NULL;
+ if (entry->matchIterator)
+ tbm_end_iterate(entry->matchIterator);
+ entry->matchIterator = NULL;
+ tbm_free(entry->matchBitmap);
+ entry->matchBitmap = NULL;
}
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
freeGinBtreeStack(stackEntry);
-
- startScanEntry(index, ginstate, entry);
- return;
+ goto restartScanEntry;
}
- if (entry->partialMatch && !tbm_is_empty(entry->partialMatch))
+ if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
{
- entry->partialMatchIterator = tbm_begin_iterate(entry->partialMatch);
+ entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
entry->isFinished = FALSE;
}
}
@@ -352,7 +447,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
Page page;
/*
- * We should unlock entry page before make deal with posting tree
+ * We should unlock entry page before touching posting tree
* to prevent deadlocks with vacuum processes. Because entry is
* never deleted from page and posting tree is never reduced to
* the posting list, we can unlock page after getting BlockNumber
@@ -360,7 +455,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
*/
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
needUnlock = FALSE;
- gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE);
+ gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, TRUE);
entry->buffer = ginScanBeginPostingTree(gdi);
@@ -402,7 +497,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
}
static void
-startScanKey(Relation index, GinState *ginstate, GinScanKey key)
+startScanKey(GinState *ginstate, GinScanKey key)
{
uint32 i;
@@ -410,7 +505,7 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
return;
for (i = 0; i < key->nentries; i++)
- startScanEntry(index, ginstate, key->scanEntry + i);
+ startScanEntry(ginstate, key->scanEntry + i);
key->isFinished = FALSE;
key->firstCall = FALSE;
@@ -444,7 +539,7 @@ startScan(IndexScanDesc scan)
GinScanOpaque so = (GinScanOpaque) scan->opaque;
for (i = 0; i < so->nkeys; i++)
- startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
+ startScanKey(&so->ginstate, so->keys + i);
}
/*
@@ -453,7 +548,7 @@ startScan(IndexScanDesc scan)
* to prevent interference with vacuum
*/
static void
-entryGetNextItem(Relation index, GinScanEntry entry)
+entryGetNextItem(GinState *ginstate, GinScanEntry entry)
{
Page page;
BlockNumber blkno;
@@ -487,13 +582,15 @@ entryGetNextItem(Relation index, GinScanEntry entry)
return;
}
- entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
+ entry->buffer = ReleaseAndReadBuffer(entry->buffer,
+ ginstate->index,
+ blkno);
LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
entry->offset = InvalidOffsetNumber;
if (!ItemPointerIsValid(&entry->curItem) ||
- findItemInPage(page, &entry->curItem, &entry->offset))
+ findItemInPostingPage(page, &entry->curItem, &entry->offset))
{
/*
* Found position equal to or greater than stored
@@ -512,7 +609,6 @@ entryGetNextItem(Relation index, GinScanEntry entry)
* First pages are deleted or empty, or we found exact
* position, so break inner loop and continue outer one.
*/
-
break;
}
@@ -527,25 +623,6 @@ entryGetNextItem(Relation index, GinScanEntry entry)
}
}
-/* convenience function for invoking a key's consistentFn */
-static inline bool
-callConsistentFn(GinState *ginstate, GinScanKey key)
-{
- /*
- * Initialize recheckCurItem in case the consistentFn doesn't know it
- * should set it. The safe assumption in that case is to force recheck.
- */
- key->recheckCurItem = true;
-
- return DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
- PointerGetDatum(key->entryRes),
- UInt16GetDatum(key->strategy),
- key->query,
- UInt32GetDatum(key->nentries),
- PointerGetDatum(key->extra_data),
- PointerGetDatum(&key->recheckCurItem)));
-}
-
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
@@ -563,7 +640,7 @@ callConsistentFn(GinState *ginstate, GinScanKey key)
* current implementation this is guaranteed by the behavior of tidbitmaps.
*/
static void
-entryGetItem(Relation index, GinScanEntry entry)
+entryGetItem(GinState *ginstate, GinScanEntry entry)
{
Assert(!entry->isFinished);
@@ -572,41 +649,40 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->isFinished = entry->master->isFinished;
entry->curItem = entry->master->curItem;
}
- else if (entry->partialMatch)
+ else if (entry->matchBitmap)
{
do
{
- if (entry->partialMatchResult == NULL ||
- entry->offset >= entry->partialMatchResult->ntuples)
+ if (entry->matchResult == NULL ||
+ entry->offset >= entry->matchResult->ntuples)
{
- entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator);
+ entry->matchResult = tbm_iterate(entry->matchIterator);
- if (entry->partialMatchResult == NULL)
+ if (entry->matchResult == NULL)
{
ItemPointerSetInvalid(&entry->curItem);
- tbm_end_iterate(entry->partialMatchIterator);
- entry->partialMatchIterator = NULL;
+ tbm_end_iterate(entry->matchIterator);
+ entry->matchIterator = NULL;
entry->isFinished = TRUE;
break;
}
/*
- * reset counter to the beginning of
- * entry->partialMatchResult. Note: entry->offset is still
- * greater than partialMatchResult->ntuples if
- * partialMatchResult is lossy. So, on next call we will get
- * next result from TIDBitmap.
+ * Reset counter to the beginning of entry->matchResult.
+ * Note: entry->offset is still greater than
+ * matchResult->ntuples if matchResult is lossy. So, on next
+ * call we will get next result from TIDBitmap.
*/
entry->offset = 0;
}
- if (entry->partialMatchResult->ntuples < 0)
+ if (entry->matchResult->ntuples < 0)
{
/*
* lossy result, so we need to check the whole page
*/
ItemPointerSetLossyPage(&entry->curItem,
- entry->partialMatchResult->blockno);
+ entry->matchResult->blockno);
/*
* We might as well fall out of the loop; we could not
@@ -617,8 +693,8 @@ entryGetItem(Relation index, GinScanEntry entry)
}
ItemPointerSet(&entry->curItem,
- entry->partialMatchResult->blockno,
- entry->partialMatchResult->offsets[entry->offset]);
+ entry->matchResult->blockno,
+ entry->matchResult->offsets[entry->offset]);
entry->offset++;
} while (entry->reduceResult == TRUE && dropItem(entry));
}
@@ -637,7 +713,7 @@ entryGetItem(Relation index, GinScanEntry entry)
{
do
{
- entryGetNextItem(index, entry);
+ entryGetNextItem(ginstate, entry);
} while (entry->isFinished == FALSE &&
entry->reduceResult == TRUE &&
dropItem(entry));
@@ -662,7 +738,7 @@ entryGetItem(Relation index, GinScanEntry entry)
* logic in scanGetItem.)
*/
static void
-keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
+keyGetItem(GinState *ginstate, MemoryContext tempCtx,
GinScanKey key, ItemPointer advancePast)
{
ItemPointerData myAdvancePast = *advancePast;
@@ -701,7 +777,7 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
while (entry->isFinished == FALSE &&
ginCompareItemPointers(&entry->curItem, &myAdvancePast) <= 0)
- entryGetItem(index, entry);
+ entryGetItem(ginstate, entry);
if (entry->isFinished == FALSE &&
ginCompareItemPointers(&entry->curItem, &key->curItem) < 0)
@@ -800,12 +876,12 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
* item pointers, possibly in combination with a lossy pointer. Our
* strategy if there's a lossy pointer is to try the consistentFn both
* ways and return a hit if it accepts either one (forcing the hit to
- * be marked lossy so it will be rechecked).
+ * be marked lossy so it will be rechecked). An exception is that
+ * we don't need to try it both ways if the lossy pointer is in a
+ * "hidden" entry, because the consistentFn's result can't depend on
+ * that.
*
* Prepare entryRes array to be passed to consistentFn.
- *
- * (If key->nentries == 1 then the consistentFn should always succeed,
- * but we must call it anyway to find out the recheck status.)
*/
for (i = 0; i < key->nentries; i++)
{
@@ -821,7 +897,7 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
res = callConsistentFn(ginstate, key);
- if (!res && haveLossyEntry)
+ if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
{
/* try the other way for the lossy item */
key->entryRes[lossyEntry] = FALSE;
@@ -839,13 +915,137 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
} while (!res);
}
+/*
+ * Get next heap item pointer (after advancePast) from scan.
+ * Returns true if anything found.
+ * On success, *item and *recheck are set.
+ *
+ * Note: this is very nearly the same logic as in keyGetItem(), except
+ * that we know the keys are to be combined with AND logic, whereas in
+ * keyGetItem() the combination logic is known only to the consistentFn.
+ */
+static bool
+scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
+ ItemPointerData *item, bool *recheck)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ ItemPointerData myAdvancePast = *advancePast;
+ uint32 i;
+ bool match;
+
+ for (;;)
+ {
+ /*
+ * Advance any keys that are <= myAdvancePast. In particular,
+ * since key->curItem was initialized with ItemPointerSetMin, this
+ * ensures we fetch the first item for each key on the first call.
+ * Then set *item to the minimum of the key curItems.
+ *
+ * Note: a lossy-page entry is encoded by a ItemPointer with max value
+ * for offset (0xffff), so that it will sort after any exact entries
+ * for the same page. So we'll prefer to return exact pointers not
+ * lossy pointers, which is good. Also, when we advance past an exact
+ * entry after processing it, we will not advance past lossy entries
+ * for the same page in other keys, which is NECESSARY for correct
+ * results (since we might have additional entries for the same page
+ * in the first key).
+ */
+ ItemPointerSetMax(item);
+
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ while (key->isFinished == FALSE &&
+ ginCompareItemPointers(&key->curItem, &myAdvancePast) <= 0)
+ keyGetItem(&so->ginstate, so->tempCtx,
+ key, &myAdvancePast);
+
+ if (key->isFinished)
+ return FALSE; /* finished one of keys */
+
+ if (ginCompareItemPointers(&key->curItem, item) < 0)
+ *item = key->curItem;
+ }
+
+ Assert(!ItemPointerIsMax(item));
+
+ /*----------
+ * Now *item contains first ItemPointer after previous result.
+ *
+ * The item is a valid hit only if all the keys returned either
+ * that exact TID, or a lossy reference to the same page.
+ *
+ * This logic works only if a keyGetItem stream can never contain both
+ * exact and lossy pointers for the same page. Else we could have a
+ * case like
+ *
+ * stream 1 stream 2
+ * ... ...
+ * 42/6 42/7
+ * 50/1 42/0xffff
+ * ... ...
+ *
+ * We would conclude that 42/6 is not a match and advance stream 1,
+ * thus never detecting the match to the lossy pointer in stream 2.
+ * (keyGetItem has a similar problem versus entryGetItem.)
+ *----------
+ */
+ match = true;
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ if (ginCompareItemPointers(item, &key->curItem) == 0)
+ continue;
+ if (ItemPointerIsLossyPage(&key->curItem) &&
+ GinItemPointerGetBlockNumber(&key->curItem) ==
+ GinItemPointerGetBlockNumber(item))
+ continue;
+ match = false;
+ break;
+ }
+
+ if (match)
+ break;
+
+ /*
+ * No hit. Update myAdvancePast to this TID, so that on the next
+ * pass we'll move to the next possible entry.
+ */
+ myAdvancePast = *item;
+ }
+
+ /*
+ * We must return recheck = true if any of the keys are marked recheck.
+ */
+ *recheck = false;
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ if (key->recheckCurItem)
+ {
+ *recheck = true;
+ break;
+ }
+ }
+
+ return TRUE;
+}
+
+
+/*
+ * Functions for scanning the pending list
+ */
+
/*
* Get ItemPointer of next heap row to be checked from pending list.
- * Returns false if there are no more. On pages with several rows
+ * Returns false if there are no more. On pages with several heap rows
* it returns each row separately, on page with part of heap row returns
- * per page data. pos->firstOffset and pos->lastOffset points
- * fraction of tuples for current heap row.
+ * per page data. pos->firstOffset and pos->lastOffset are set to identify
+ * the range of pending-list tuples belonging to this heap row.
*
* The pendingBuffer is presumed pinned and share-locked on entry, and is
* pinned and share-locked on success exit. On failure exit it's released.
@@ -917,10 +1117,9 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
/*
* Now pos->firstOffset points to the first tuple of current heap
- * row, pos->lastOffset points to the first tuple of second heap
+ * row, pos->lastOffset points to the first tuple of next heap
* row (or to the end of page)
*/
-
break;
}
}
@@ -929,35 +1128,47 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
}
/*
- * Scan page from current tuple (off) up till the first of:
+ * Scan pending-list page from current tuple (off) up till the first of:
* - match is found (then returns true)
* - no later match is possible
* - tuple's attribute number is not equal to entry's attrnum
* - reach end of page
+ *
+ * datum[]/category[]/datumExtracted[] arrays are used to cache the results
+ * of gintuple_get_key() on the current page.
*/
static bool
matchPartialInPendingList(GinState *ginstate, Page page,
OffsetNumber off, OffsetNumber maxoff,
- Datum value, OffsetNumber attrnum,
- Datum *datum, bool *datumExtracted,
- StrategyNumber strategy,
- Pointer extra_data)
+ GinScanEntry entry,
+ Datum *datum, GinNullCategory *category,
+ bool *datumExtracted)
{
IndexTuple itup;
int32 cmp;
+ /* Partial match to a null is not possible */
+ if (entry->queryCategory != GIN_CAT_NORM_KEY)
+ return false;
+
while (off < maxoff)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
- if (attrnum != gintuple_get_attrnum(ginstate, itup))
+
+ if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
return false;
if (datumExtracted[off - 1] == false)
{
- datum[off - 1] = gin_index_getattr(ginstate, itup);
+ datum[off - 1] = gintuple_get_key(ginstate, itup,
+ &category[off - 1]);
datumExtracted[off - 1] = true;
}
+ /* Once we hit nulls, no further match is possible */
+ if (category[off - 1] != GIN_CAT_NORM_KEY)
+ return false;
+
/*----------
* Check partial match.
* case cmp == 0 => match
@@ -965,11 +1176,11 @@ matchPartialInPendingList(GinState *ginstate, Page page,
* case cmp < 0 => not match and continue scan
*----------
*/
- cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[attrnum - 1],
- value,
+ cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[entry->attnum - 1],
+ entry->queryKey,
datum[off - 1],
- UInt16GetDatum(strategy),
- PointerGetDatum(extra_data)));
+ UInt16GetDatum(entry->strategy),
+ PointerGetDatum(entry->extra_data)));
if (cmp == 0)
return true;
else if (cmp > 0)
@@ -981,27 +1192,20 @@ matchPartialInPendingList(GinState *ginstate, Page page,
return false;
}
-static bool
-hasAllMatchingKeys(GinScanOpaque so, pendingPosition *pos)
-{
- int i;
-
- for (i = 0; i < so->nkeys; i++)
- if (pos->hasMatchKey[i] == false)
- return false;
-
- return true;
-}
-
/*
- * Sets entryRes array for each key by looking at
- * every entry per indexed value (heap's row) in pending list.
- * returns true if at least one of datum was matched by key's entry
+ * Set up the entryRes array for each key by looking at
+ * every entry for current heap row in pending list.
+ *
+ * Returns true if each scan key has at least one entryRes match.
+ * This corresponds to the situations where the normal index search will
+ * try to apply the key's consistentFn. (A tuple not meeting that requirement
+ * cannot be returned by the normal search since no entry stream will
+ * source its TID.)
*
* The pendingBuffer is presumed pinned and share-locked on entry.
*/
static bool
-collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
+collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
{
GinScanOpaque so = (GinScanOpaque) scan->opaque;
OffsetNumber attrnum;
@@ -1011,7 +1215,7 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
j;
/*
- * Reset entryRes
+ * Reset all entryRes and hasMatchKey flags
*/
for (i = 0; i < so->nkeys; i++)
{
@@ -1021,13 +1225,19 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
}
memset(pos->hasMatchKey, FALSE, so->nkeys);
+ /*
+ * Outer loop iterates over multiple pending-list pages when a single
+ * heap row has entries spanning those pages.
+ */
for (;;)
{
Datum datum[BLCKSZ / sizeof(IndexTupleData)];
+ GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
Assert(pos->lastOffset > pos->firstOffset);
- memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset));
+ memset(datumExtracted + pos->firstOffset - 1, 0,
+ sizeof(bool) * (pos->lastOffset - pos->firstOffset));
page = BufferGetPage(pos->pendingBuffer);
@@ -1037,128 +1247,175 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
for (j = 0; j < key->nentries; j++)
{
+ GinScanEntry entry = key->scanEntry + j;
OffsetNumber StopLow = pos->firstOffset,
StopHigh = pos->lastOffset,
StopMiddle;
- GinScanEntry entry = key->scanEntry + j;
- /* already true - do not extra work */
+ /* If already matched on earlier page, do no extra work */
if (key->entryRes[j])
continue;
/*
- * Interested tuples are from pos->firstOffset to
+ * Interesting tuples are from pos->firstOffset to
* pos->lastOffset and they are ordered by (attnum, Datum) as
- * it's done in entry tree So we could use binary search to
- * prevent linear scanning
+ * it's done in entry tree. So we can use binary search to
+ * avoid linear scanning.
*/
while (StopLow < StopHigh)
{
+ int res;
+
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+
attrnum = gintuple_get_attrnum(&so->ginstate, itup);
if (key->attnum < attrnum)
+ {
StopHigh = StopMiddle;
- else if (key->attnum > attrnum)
+ continue;
+ }
+ if (key->attnum > attrnum)
+ {
StopLow = StopMiddle + 1;
- else
+ continue;
+ }
+
+ if (datumExtracted[StopMiddle - 1] == false)
{
- int res;
+ datum[StopMiddle - 1] =
+ gintuple_get_key(&so->ginstate, itup,
+ &category[StopMiddle - 1]);
+ datumExtracted[StopMiddle - 1] = true;
+ }
- if (datumExtracted[StopMiddle - 1] == false)
+ if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
+ {
+ /* special behavior depending on searchMode */
+ if (entry->searchMode == GIN_SEARCH_MODE_ALL)
{
- datum[StopMiddle - 1] = gin_index_getattr(&so->ginstate, itup);
- datumExtracted[StopMiddle - 1] = true;
+ /* match anything except NULL_ITEM */
+ if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
+ res = -1;
+ else
+ res = 0;
+ }
+ else
+ {
+ /* match everything */
+ res = 0;
}
+ }
+ else
+ {
res = ginCompareEntries(&so->ginstate,
entry->attnum,
- entry->entry,
- datum[StopMiddle - 1]);
+ entry->queryKey,
+ entry->queryCategory,
+ datum[StopMiddle - 1],
+ category[StopMiddle - 1]);
+ }
- if (res == 0)
- {
- /*
- * The exact match causes, so we just scan from
- * current position to find a partial match. See
- * comment above about tuple's ordering.
- */
- if (entry->isPartialMatch)
- key->entryRes[j] =
- matchPartialInPendingList(&so->ginstate,
- page, StopMiddle,
- pos->lastOffset,
- entry->entry,
- entry->attnum,
- datum,
- datumExtracted,
- entry->strategy,
- entry->extra_data);
- else
- key->entryRes[j] = true;
- break;
- }
- else if (res < 0)
- StopHigh = StopMiddle;
+ if (res == 0)
+ {
+ /*
+ * Found exact match (there can be only one, except
+ * in EMPTY_QUERY mode).
+ *
+ * If doing partial match, scan forward from
+ * here to end of page to check for matches.
+ *
+ * See comment above about tuple's ordering.
+ */
+ if (entry->isPartialMatch)
+ key->entryRes[j] =
+ matchPartialInPendingList(&so->ginstate,
+ page,
+ StopMiddle,
+ pos->lastOffset,
+ entry,
+ datum,
+ category,
+ datumExtracted);
else
- StopLow = StopMiddle + 1;
+ key->entryRes[j] = true;
+
+ /* done with binary search */
+ break;
}
+ else if (res < 0)
+ StopHigh = StopMiddle;
+ else
+ StopLow = StopMiddle + 1;
}
if (StopLow >= StopHigh && entry->isPartialMatch)
{
/*
- * The exact match wasn't found, so we need to start scan
- * from first tuple greater then current entry See comment
- * above about tuple's ordering.
+ * No exact match on this page. If doing partial
+ * match, scan from the first tuple greater than
+ * target value to end of page. Note that since we
+ * don't remember whether the comparePartialFn told us
+ * to stop early on a previous page, we will uselessly
+ * apply comparePartialFn to the first tuple on each
+ * subsequent page.
*/
key->entryRes[j] =
matchPartialInPendingList(&so->ginstate,
- page, StopHigh,
+ page,
+ StopHigh,
pos->lastOffset,
- entry->entry,
- entry->attnum,
+ entry,
datum,
- datumExtracted,
- entry->strategy,
- entry->extra_data);
+ category,
+ datumExtracted);
}
pos->hasMatchKey[i] |= key->entryRes[j];
}
}
+ /* Advance firstOffset over the scanned tuples */
pos->firstOffset = pos->lastOffset;
if (GinPageHasFullRow(page))
{
/*
- * We scan all values from one tuple, go to next one
+ * We have examined all pending entries for the current heap row.
+ * Break out of loop over pages.
*/
-
- return hasAllMatchingKeys(so, pos);
+ break;
}
else
{
- ItemPointerData item = pos->item;
-
/*
- * need to get next portion of tuples of row containing on several
- * pages
+ * Advance to next page of pending entries for the current heap
+ * row. Complain if there isn't one.
*/
+ ItemPointerData item = pos->item;
- if (scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item))
- elog(ERROR, "Could not process tuple"); /* XXX should not be
- * here ! */
+ if (scanGetCandidate(scan, pos) == false ||
+ !ItemPointerEquals(&pos->item, &item))
+ elog(ERROR, "could not find additional pending pages for same heap tuple");
}
}
- return hasAllMatchingKeys(so, pos);
+ /*
+ * Now return "true" if all scan keys have at least one matching datum
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ if (pos->hasMatchKey[i] == false)
+ return false;
+ }
+
+ return true;
}
/*
- * Collect all matched rows from pending list in bitmap
+ * Collect all matched rows from pending list into bitmap
*/
static void
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
@@ -1201,11 +1458,13 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
while (scanGetCandidate(scan, &pos))
{
/*
- * Check entries in tuple and setup entryRes array If tuples of heap's
- * row are placed on several pages collectDatumForItem will read all
- * of that pages.
+ * Check entries in tuple and set up entryRes array.
+ *
+ * If pending tuples belonging to the current heap row are spread
+ * across several pages, collectMatchesForHeapRow will read all of
+ * those pages.
*/
- if (!collectDatumForItem(scan, &pos))
+ if (!collectMatchesForHeapRow(scan, &pos))
continue;
/*
@@ -1241,124 +1500,6 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
pfree(pos.hasMatchKey);
}
-/*
- * Get next heap item pointer (after advancePast) from scan.
- * Returns true if anything found.
- * On success, *item and *recheck are set.
- *
- * Note: this is very nearly the same logic as in keyGetItem(), except
- * that we know the keys are to be combined with AND logic, whereas in
- * keyGetItem() the combination logic is known only to the consistentFn.
- */
-static bool
-scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
- ItemPointerData *item, bool *recheck)
-{
- GinScanOpaque so = (GinScanOpaque) scan->opaque;
- ItemPointerData myAdvancePast = *advancePast;
- uint32 i;
- bool match;
-
- for (;;)
- {
- /*
- * Advance any keys that are <= myAdvancePast. In particular,
- * since key->curItem was initialized with ItemPointerSetMin, this
- * ensures we fetch the first item for each key on the first call.
- * Then set *item to the minimum of the key curItems.
- *
- * Note: a lossy-page entry is encoded by a ItemPointer with max value
- * for offset (0xffff), so that it will sort after any exact entries
- * for the same page. So we'll prefer to return exact pointers not
- * lossy pointers, which is good. Also, when we advance past an exact
- * entry after processing it, we will not advance past lossy entries
- * for the same page in other keys, which is NECESSARY for correct
- * results (since we might have additional entries for the same page
- * in the first key).
- */
- ItemPointerSetMax(item);
-
- for (i = 0; i < so->nkeys; i++)
- {
- GinScanKey key = so->keys + i;
-
- while (key->isFinished == FALSE &&
- ginCompareItemPointers(&key->curItem, &myAdvancePast) <= 0)
- keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx,
- key, &myAdvancePast);
-
- if (key->isFinished)
- return FALSE; /* finished one of keys */
-
- if (ginCompareItemPointers(&key->curItem, item) < 0)
- *item = key->curItem;
- }
-
- Assert(!ItemPointerIsMax(item));
-
- /*----------
- * Now *item contains first ItemPointer after previous result.
- *
- * The item is a valid hit only if all the keys returned either
- * that exact TID, or a lossy reference to the same page.
- *
- * This logic works only if a keyGetItem stream can never contain both
- * exact and lossy pointers for the same page. Else we could have a
- * case like
- *
- * stream 1 stream 2
- * ... ...
- * 42/6 42/7
- * 50/1 42/0xffff
- * ... ...
- *
- * We would conclude that 42/6 is not a match and advance stream 1,
- * thus never detecting the match to the lossy pointer in stream 2.
- * (keyGetItem has a similar problem versus entryGetItem.)
- *----------
- */
- match = true;
- for (i = 0; i < so->nkeys; i++)
- {
- GinScanKey key = so->keys + i;
-
- if (ginCompareItemPointers(item, &key->curItem) == 0)
- continue;
- if (ItemPointerIsLossyPage(&key->curItem) &&
- GinItemPointerGetBlockNumber(&key->curItem) ==
- GinItemPointerGetBlockNumber(item))
- continue;
- match = false;
- break;
- }
-
- if (match)
- break;
-
- /*
- * No hit. Update myAdvancePast to this TID, so that on the next
- * pass we'll move to the next possible entry.
- */
- myAdvancePast = *item;
- }
-
- /*
- * We must return recheck = true if any of the keys are marked recheck.
- */
- *recheck = false;
- for (i = 0; i < so->nkeys; i++)
- {
- GinScanKey key = so->keys + i;
-
- if (key->recheckCurItem)
- {
- *recheck = true;
- break;
- }
- }
-
- return TRUE;
-}
#define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL )
#define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
@@ -1372,6 +1513,9 @@ gingetbitmap(PG_FUNCTION_ARGS)
ItemPointerData iptr;
bool recheck;
+ /*
+ * Set up the scan keys, and check for unsatisfiable query.
+ */
if (GinIsNewKey(scan))
ginNewScanKey(scan);