diff options
Diffstat (limited to 'src/backend/utils/adt/like_support.c')
-rw-r--r-- | src/backend/utils/adt/like_support.c | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c new file mode 100644 index 00000000000..b001dde5fc7 --- /dev/null +++ b/src/backend/utils/adt/like_support.c @@ -0,0 +1,313 @@ +/*------------------------------------------------------------------------- + * + * like_support.c + * Planner support functions for LIKE, regex, and related operators. + * + * These routines handle special optimization of operators that can be + * used with index scans even though they are not known to the executor's + * indexscan machinery. The key idea is that these operators allow us + * to derive approximate indexscan qual clauses, such that any tuples + * that pass the operator clause itself must also satisfy the simpler + * indexscan condition(s). Then we can use the indexscan machinery + * to avoid scanning as much of the table as we'd otherwise have to, + * while applying the original operator as a qpqual condition to ensure + * we deliver only the tuples we want. (In essence, we're using a regular + * index as if it were a lossy index.) + * + * An example of what we're doing is + * textfield LIKE 'abc%def' + * from which we can generate the indexscanable conditions + * textfield >= 'abc' AND textfield < 'abd' + * which allow efficient scanning of an index on textfield. + * (In reality, character set and collation issues make the transformation + * from LIKE to indexscan limits rather harder than one might think ... + * but that's the basic idea.) + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/like_support.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/stratnum.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" +#include "utils/selfuncs.h" + + +static Node *like_regex_support(Node *rawreq, Pattern_Type ptype); +static List *match_pattern_prefix(Node *leftop, + Node *rightop, + Pattern_Type ptype, + Oid expr_coll, + Oid opfamily, + Oid indexcollation); + + +/* + * Planner support functions for LIKE, regex, and related operators + */ +Datum +textlike_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like)); +} + +Datum +texticlike_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC)); +} + +Datum +textregexeq_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex)); +} + +Datum +texticregexeq_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC)); +} + +/* Common code for the above */ +static Node * +like_regex_support(Node *rawreq, Pattern_Type ptype) +{ + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestIndexCondition)) + { + /* Try to convert operator/function call to index conditions */ + SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq; + + /* + * Currently we have no "reverse" match operators with the pattern on + * the left, so we only need consider cases with the indexkey on the + * left. + */ + if (req->indexarg != 0) + return NULL; + + if (is_opclause(req->node)) + { + OpExpr *clause = (OpExpr *) req->node; + + Assert(list_length(clause->args) == 2); + ret = (Node *) + match_pattern_prefix((Node *) linitial(clause->args), + (Node *) lsecond(clause->args), + ptype, + clause->inputcollid, + req->opfamily, + req->indexcollation); + } + else if (is_funcclause(req->node)) /* be paranoid */ + { + FuncExpr *clause = (FuncExpr *) req->node; + + Assert(list_length(clause->args) == 2); + ret = (Node *) + match_pattern_prefix((Node *) linitial(clause->args), + (Node *) lsecond(clause->args), + ptype, + clause->inputcollid, + req->opfamily, + req->indexcollation); + } + } + + return ret; +} + +/* + * match_pattern_prefix + * Try to generate an indexqual for a LIKE or regex operator. + */ +static List * +match_pattern_prefix(Node *leftop, + Node *rightop, + Pattern_Type ptype, + Oid expr_coll, + Oid opfamily, + Oid indexcollation) +{ + List *result; + Const *patt; + Const *prefix; + Pattern_Prefix_Status pstatus; + Oid ldatatype; + Oid rdatatype; + Oid oproid; + Expr *expr; + FmgrInfo ltproc; + Const *greaterstr; + + /* + * Can't do anything with a non-constant or NULL pattern argument. + * + * Note that since we restrict ourselves to cases with a hard constant on + * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry + * about verifying that. + */ + if (!IsA(rightop, Const) || + ((Const *) rightop)->constisnull) + return NIL; + patt = (Const *) rightop; + + /* + * Try to extract a fixed prefix from the pattern. + */ + pstatus = pattern_fixed_prefix(patt, ptype, expr_coll, + &prefix, NULL); + + /* fail if no fixed prefix */ + if (pstatus == Pattern_Prefix_None) + return NIL; + + /* + * Must also check that index's opfamily supports the operators we will + * want to apply. (A hash index, for example, will not support ">=".) + * Currently, only btree and spgist support the operators we need. + * + * Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a + * hash index would work. Currently it doesn't seem worth checking for + * that, however. + * + * We insist on the opfamily being one of the specific ones we expect, + * else we'd do the wrong thing if someone were to make a reverse-sort + * opfamily with the same operators. + * + * The non-pattern opclasses will not sort the way we need in most non-C + * locales. We can use such an index anyway for an exact match (simple + * equality), but not for prefix-match cases. Note that here we are + * looking at the index's collation, not the expression's collation -- + * this test is *not* dependent on the LIKE/regex operator's collation. + * + * While we're at it, identify the type the comparison constant(s) should + * have, based on the opfamily. + */ + switch (opfamily) + { + case TEXT_BTREE_FAM_OID: + if (!(pstatus == Pattern_Prefix_Exact || + lc_collate_is_c(indexcollation))) + return NIL; + rdatatype = TEXTOID; + break; + + case TEXT_PATTERN_BTREE_FAM_OID: + case TEXT_SPGIST_FAM_OID: + rdatatype = TEXTOID; + break; + + case BPCHAR_BTREE_FAM_OID: + if (!(pstatus == Pattern_Prefix_Exact || + lc_collate_is_c(indexcollation))) + return NIL; + rdatatype = BPCHAROID; + break; + + case BPCHAR_PATTERN_BTREE_FAM_OID: + rdatatype = BPCHAROID; + break; + + case BYTEA_BTREE_FAM_OID: + rdatatype = BYTEAOID; + break; + + default: + return NIL; + } + + /* OK, prepare to create the indexqual(s) */ + ldatatype = exprType(leftop); + + /* + * If necessary, coerce the prefix constant to the right type. The given + * prefix constant is either text or bytea type, therefore the only case + * where we need to do anything is when converting text to bpchar. Those + * two types are binary-compatible, so relabeling the Const node is + * sufficient. + */ + if (prefix->consttype != rdatatype) + { + Assert(prefix->consttype == TEXTOID && + rdatatype == BPCHAROID); + prefix->consttype = rdatatype; + } + + /* + * If we found an exact-match pattern, generate an "=" indexqual. + */ + if (pstatus == Pattern_Prefix_Exact) + { + oproid = get_opfamily_member(opfamily, ldatatype, rdatatype, + BTEqualStrategyNumber); + if (oproid == InvalidOid) + elog(ERROR, "no = operator for opfamily %u", opfamily); + expr = make_opclause(oproid, BOOLOID, false, + (Expr *) leftop, (Expr *) prefix, + InvalidOid, indexcollation); + result = list_make1(expr); + return result; + } + + /* + * Otherwise, we have a nonempty required prefix of the values. + * + * We can always say "x >= prefix". + */ + oproid = get_opfamily_member(opfamily, ldatatype, rdatatype, + BTGreaterEqualStrategyNumber); + if (oproid == InvalidOid) + elog(ERROR, "no >= operator for opfamily %u", opfamily); + expr = make_opclause(oproid, BOOLOID, false, + (Expr *) leftop, (Expr *) prefix, + InvalidOid, indexcollation); + result = list_make1(expr); + + /*------- + * If we can create a string larger than the prefix, we can say + * "x < greaterstr". NB: we rely on make_greater_string() to generate + * a guaranteed-greater string, not just a probably-greater string. + * In general this is only guaranteed in C locale, so we'd better be + * using a C-locale index collation. + *------- + */ + oproid = get_opfamily_member(opfamily, ldatatype, rdatatype, + BTLessStrategyNumber); + if (oproid == InvalidOid) + elog(ERROR, "no < operator for opfamily %u", opfamily); + fmgr_info(get_opcode(oproid), <proc); + greaterstr = make_greater_string(prefix, <proc, indexcollation); + if (greaterstr) + { + expr = make_opclause(oproid, BOOLOID, false, + (Expr *) leftop, (Expr *) greaterstr, + InvalidOid, indexcollation); + result = lappend(result, expr); + } + + return result; +} |