summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/like_support.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/like_support.c')
-rw-r--r--src/backend/utils/adt/like_support.c313
1 files changed, 313 insertions, 0 deletions
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
new file mode 100644
index 00000000000..b001dde5fc7
--- /dev/null
+++ b/src/backend/utils/adt/like_support.c
@@ -0,0 +1,313 @@
+/*-------------------------------------------------------------------------
+ *
+ * like_support.c
+ * Planner support functions for LIKE, regex, and related operators.
+ *
+ * These routines handle special optimization of operators that can be
+ * used with index scans even though they are not known to the executor's
+ * indexscan machinery. The key idea is that these operators allow us
+ * to derive approximate indexscan qual clauses, such that any tuples
+ * that pass the operator clause itself must also satisfy the simpler
+ * indexscan condition(s). Then we can use the indexscan machinery
+ * to avoid scanning as much of the table as we'd otherwise have to,
+ * while applying the original operator as a qpqual condition to ensure
+ * we deliver only the tuples we want. (In essence, we're using a regular
+ * index as if it were a lossy index.)
+ *
+ * An example of what we're doing is
+ * textfield LIKE 'abc%def'
+ * from which we can generate the indexscanable conditions
+ * textfield >= 'abc' AND textfield < 'abd'
+ * which allow efficient scanning of an index on textfield.
+ * (In reality, character set and collation issues make the transformation
+ * from LIKE to indexscan limits rather harder than one might think ...
+ * but that's the basic idea.)
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/like_support.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/stratnum.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/selfuncs.h"
+
+
+static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
+static List *match_pattern_prefix(Node *leftop,
+ Node *rightop,
+ Pattern_Type ptype,
+ Oid expr_coll,
+ Oid opfamily,
+ Oid indexcollation);
+
+
+/*
+ * Planner support functions for LIKE, regex, and related operators
+ */
+Datum
+textlike_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
+}
+
+Datum
+texticlike_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
+}
+
+Datum
+textregexeq_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
+}
+
+Datum
+texticregexeq_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
+}
+
+/* Common code for the above */
+static Node *
+like_regex_support(Node *rawreq, Pattern_Type ptype)
+{
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestIndexCondition))
+ {
+ /* Try to convert operator/function call to index conditions */
+ SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
+
+ /*
+ * Currently we have no "reverse" match operators with the pattern on
+ * the left, so we only need consider cases with the indexkey on the
+ * left.
+ */
+ if (req->indexarg != 0)
+ return NULL;
+
+ if (is_opclause(req->node))
+ {
+ OpExpr *clause = (OpExpr *) req->node;
+
+ Assert(list_length(clause->args) == 2);
+ ret = (Node *)
+ match_pattern_prefix((Node *) linitial(clause->args),
+ (Node *) lsecond(clause->args),
+ ptype,
+ clause->inputcollid,
+ req->opfamily,
+ req->indexcollation);
+ }
+ else if (is_funcclause(req->node)) /* be paranoid */
+ {
+ FuncExpr *clause = (FuncExpr *) req->node;
+
+ Assert(list_length(clause->args) == 2);
+ ret = (Node *)
+ match_pattern_prefix((Node *) linitial(clause->args),
+ (Node *) lsecond(clause->args),
+ ptype,
+ clause->inputcollid,
+ req->opfamily,
+ req->indexcollation);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * match_pattern_prefix
+ * Try to generate an indexqual for a LIKE or regex operator.
+ */
+static List *
+match_pattern_prefix(Node *leftop,
+ Node *rightop,
+ Pattern_Type ptype,
+ Oid expr_coll,
+ Oid opfamily,
+ Oid indexcollation)
+{
+ List *result;
+ Const *patt;
+ Const *prefix;
+ Pattern_Prefix_Status pstatus;
+ Oid ldatatype;
+ Oid rdatatype;
+ Oid oproid;
+ Expr *expr;
+ FmgrInfo ltproc;
+ Const *greaterstr;
+
+ /*
+ * Can't do anything with a non-constant or NULL pattern argument.
+ *
+ * Note that since we restrict ourselves to cases with a hard constant on
+ * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
+ * about verifying that.
+ */
+ if (!IsA(rightop, Const) ||
+ ((Const *) rightop)->constisnull)
+ return NIL;
+ patt = (Const *) rightop;
+
+ /*
+ * Try to extract a fixed prefix from the pattern.
+ */
+ pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
+ &prefix, NULL);
+
+ /* fail if no fixed prefix */
+ if (pstatus == Pattern_Prefix_None)
+ return NIL;
+
+ /*
+ * Must also check that index's opfamily supports the operators we will
+ * want to apply. (A hash index, for example, will not support ">=".)
+ * Currently, only btree and spgist support the operators we need.
+ *
+ * Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a
+ * hash index would work. Currently it doesn't seem worth checking for
+ * that, however.
+ *
+ * We insist on the opfamily being one of the specific ones we expect,
+ * else we'd do the wrong thing if someone were to make a reverse-sort
+ * opfamily with the same operators.
+ *
+ * The non-pattern opclasses will not sort the way we need in most non-C
+ * locales. We can use such an index anyway for an exact match (simple
+ * equality), but not for prefix-match cases. Note that here we are
+ * looking at the index's collation, not the expression's collation --
+ * this test is *not* dependent on the LIKE/regex operator's collation.
+ *
+ * While we're at it, identify the type the comparison constant(s) should
+ * have, based on the opfamily.
+ */
+ switch (opfamily)
+ {
+ case TEXT_BTREE_FAM_OID:
+ if (!(pstatus == Pattern_Prefix_Exact ||
+ lc_collate_is_c(indexcollation)))
+ return NIL;
+ rdatatype = TEXTOID;
+ break;
+
+ case TEXT_PATTERN_BTREE_FAM_OID:
+ case TEXT_SPGIST_FAM_OID:
+ rdatatype = TEXTOID;
+ break;
+
+ case BPCHAR_BTREE_FAM_OID:
+ if (!(pstatus == Pattern_Prefix_Exact ||
+ lc_collate_is_c(indexcollation)))
+ return NIL;
+ rdatatype = BPCHAROID;
+ break;
+
+ case BPCHAR_PATTERN_BTREE_FAM_OID:
+ rdatatype = BPCHAROID;
+ break;
+
+ case BYTEA_BTREE_FAM_OID:
+ rdatatype = BYTEAOID;
+ break;
+
+ default:
+ return NIL;
+ }
+
+ /* OK, prepare to create the indexqual(s) */
+ ldatatype = exprType(leftop);
+
+ /*
+ * If necessary, coerce the prefix constant to the right type. The given
+ * prefix constant is either text or bytea type, therefore the only case
+ * where we need to do anything is when converting text to bpchar. Those
+ * two types are binary-compatible, so relabeling the Const node is
+ * sufficient.
+ */
+ if (prefix->consttype != rdatatype)
+ {
+ Assert(prefix->consttype == TEXTOID &&
+ rdatatype == BPCHAROID);
+ prefix->consttype = rdatatype;
+ }
+
+ /*
+ * If we found an exact-match pattern, generate an "=" indexqual.
+ */
+ if (pstatus == Pattern_Prefix_Exact)
+ {
+ oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTEqualStrategyNumber);
+ if (oproid == InvalidOid)
+ elog(ERROR, "no = operator for opfamily %u", opfamily);
+ expr = make_opclause(oproid, BOOLOID, false,
+ (Expr *) leftop, (Expr *) prefix,
+ InvalidOid, indexcollation);
+ result = list_make1(expr);
+ return result;
+ }
+
+ /*
+ * Otherwise, we have a nonempty required prefix of the values.
+ *
+ * We can always say "x >= prefix".
+ */
+ oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTGreaterEqualStrategyNumber);
+ if (oproid == InvalidOid)
+ elog(ERROR, "no >= operator for opfamily %u", opfamily);
+ expr = make_opclause(oproid, BOOLOID, false,
+ (Expr *) leftop, (Expr *) prefix,
+ InvalidOid, indexcollation);
+ result = list_make1(expr);
+
+ /*-------
+ * If we can create a string larger than the prefix, we can say
+ * "x < greaterstr". NB: we rely on make_greater_string() to generate
+ * a guaranteed-greater string, not just a probably-greater string.
+ * In general this is only guaranteed in C locale, so we'd better be
+ * using a C-locale index collation.
+ *-------
+ */
+ oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTLessStrategyNumber);
+ if (oproid == InvalidOid)
+ elog(ERROR, "no < operator for opfamily %u", opfamily);
+ fmgr_info(get_opcode(oproid), &ltproc);
+ greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
+ if (greaterstr)
+ {
+ expr = make_opclause(oproid, BOOLOID, false,
+ (Expr *) leftop, (Expr *) greaterstr,
+ InvalidOid, indexcollation);
+ result = lappend(result, expr);
+ }
+
+ return result;
+}