summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/tsginidx.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-04-27 12:21:04 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-04-27 12:21:04 -0400
commit22dcb6c206ac72d0ea722d22e89e5755beed7b76 (patch)
tree7c38b6d5f3f79b90ef1d36cde8a8df16d116a9ae /src/backend/utils/adt/tsginidx.c
parent86b7a4e6fd7db56022bf49b1c7eb482fdfddf8e5 (diff)
Fix full text search to handle NOT above a phrase search correctly.
Queries such as '!(foo<->bar)' failed to find matching rows when implemented as a GiST or GIN index search. That's because of failing to handle phrase searches as tri-valued when considering a query without any position information for the target tsvector. We can only say that the phrase operator might match, not that it does match; and therefore its NOT also might match. The previous coding incorrectly inverted the approximate phrase result to decide that there was certainly no match. To fix, we need to make TS_phrase_execute return a real ternary result, and then bubble that up accurately in TS_execute. As long as we have to do that anyway, we can simplify the baroque things TS_phrase_execute was doing internally to manage tri-valued searching with only a bool as explicit result. For now, I left the externally-visible result of TS_execute as a plain bool. There do not appear to be any outside callers that need to distinguish a three-way result, given that they passed in a flag saying what to do in the absence of position data. This might need to change someday, but we wouldn't want to back-patch such a change. Although tsginidx.c has its own TS_execute_ternary implementation for use at upper index levels, that sadly managed to get this case wrong as well :-(. Fixing it is a lot easier fortunately. Per bug #16388 from Charles Offenbacher. Back-patch to 9.6 where phrase search was introduced. Discussion: https://postgr.es/m/16388-98cffba38d0b7e6e@postgresql.org
Diffstat (limited to 'src/backend/utils/adt/tsginidx.c')
-rw-r--r--src/backend/utils/adt/tsginidx.c23
1 files changed, 20 insertions, 3 deletions
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 3e0a44459ac..22225a33522 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -210,6 +210,11 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
/*
* Evaluate tsquery boolean expression using ternary logic.
+ *
+ * Note: the reason we can't use TS_execute() for this is that its API
+ * for the checkcondition callback doesn't allow a MAYBE result to be
+ * returned, but we might have MAYBEs in the gcv->check array.
+ * Perhaps we should change that API.
*/
static GinTernaryValue
TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
@@ -230,9 +235,19 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
switch (curitem->qoperator.oper)
{
case OP_NOT:
- /* In phrase search, always return MAYBE since we lack positions */
+
+ /*
+ * Below a phrase search, force NOT's result to MAYBE. We cannot
+ * invert a TRUE result from the subexpression to FALSE, since
+ * TRUE only says that the subexpression matches somewhere, not
+ * that it matches everywhere, so there might be positions where
+ * the NOT will match. We could invert FALSE to TRUE, but there's
+ * little point in distinguishing TRUE from MAYBE, since a recheck
+ * will have been forced already.
+ */
if (in_phrase)
return GIN_MAYBE;
+
result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (result == GIN_MAYBE)
return result;
@@ -242,7 +257,8 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
/*
* GIN doesn't contain any information about positions, so treat
- * OP_PHRASE as OP_AND with recheck requirement
+ * OP_PHRASE as OP_AND with recheck requirement, and always
+ * reporting MAYBE not TRUE.
*/
*(gcv->need_recheck) = true;
/* Pass down in_phrase == true in case there's a NOT below */
@@ -258,7 +274,8 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_FALSE)
return GIN_FALSE;
- if (val1 == GIN_TRUE && val2 == GIN_TRUE)
+ if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
+ curitem->qoperator.oper != OP_PHRASE)
return GIN_TRUE;
else
return GIN_MAYBE;