diff options
author | Peter Eisentraut <peter_e@gmx.net> | 2003-05-15 15:50:21 +0000 |
---|---|---|
committer | Peter Eisentraut <peter_e@gmx.net> | 2003-05-15 15:50:21 +0000 |
commit | 2c0556068fc308ed9cce06c85de7e42305d34b86 (patch) | |
tree | 2c9f5561bdc7b660e2fbc5eb2dd67a24f7a654f8 /src/backend | |
parent | 2a2f6cfa3983e6834299857c80bc07d32d1e019a (diff) |
Indexing support for pattern matching operations via separate operator
class when lc_collate is not C.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/access/nbtree/nbtcompare.c | 11 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 81 | ||||
-rw-r--r-- | src/backend/utils/adt/name.c | 61 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 121 | ||||
-rw-r--r-- | src/backend/utils/adt/varlena.c | 145 |
5 files changed, 278 insertions, 141 deletions
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index 2823ee4207d..f8c479677f9 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.44 2002/06/20 20:29:25 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.45 2003/05/15 15:50:18 petere Exp $ * * NOTES * @@ -149,3 +149,12 @@ btnamecmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN)); } + +Datum +btname_pattern_cmp(PG_FUNCTION_ARGS) +{ + Name a = PG_GETARG_NAME(0); + Name b = PG_GETARG_NAME(1); + + PG_RETURN_INT32(memcmp(NameStr(*a), NameStr(*b), NAMEDATALEN)); +} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 8254c6b0391..e8ba0b67c11 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.137 2003/05/13 04:38:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.138 2003/05/15 15:50:18 petere Exp $ * *------------------------------------------------------------------------- */ @@ -1797,14 +1797,13 @@ match_special_index_operator(Expr *clause, Oid opclass, case OID_VARCHAR_LIKE_OP: case OID_NAME_LIKE_OP: /* the right-hand const is type text for all of these */ - if (locale_is_like_safe()) - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest) != Pattern_Prefix_None; + isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like, + &prefix, &rest) != Pattern_Prefix_None; break; case OID_BYTEA_LIKE_OP: isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest) != Pattern_Prefix_None; + &prefix, &rest) != Pattern_Prefix_None; break; case OID_TEXT_ICLIKE_OP: @@ -1812,9 +1811,8 @@ match_special_index_operator(Expr *clause, Oid opclass, case OID_VARCHAR_ICLIKE_OP: case OID_NAME_ICLIKE_OP: /* the right-hand const is type text for all of these */ - if (locale_is_like_safe()) - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, - &prefix, &rest) != Pattern_Prefix_None; + isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, + &prefix, &rest) != Pattern_Prefix_None; break; case OID_TEXT_REGEXEQ_OP: @@ -1822,9 +1820,8 @@ match_special_index_operator(Expr *clause, Oid opclass, case OID_VARCHAR_REGEXEQ_OP: case OID_NAME_REGEXEQ_OP: /* the right-hand const is type text for all of these */ - if (locale_is_like_safe()) - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex, - &prefix, &rest) != Pattern_Prefix_None; + isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex, + &prefix, &rest) != Pattern_Prefix_None; break; case OID_TEXT_ICREGEXEQ_OP: @@ -1832,9 +1829,8 @@ match_special_index_operator(Expr *clause, Oid opclass, case OID_VARCHAR_ICREGEXEQ_OP: case OID_NAME_ICREGEXEQ_OP: /* the right-hand const is type text for all of these */ - if (locale_is_like_safe()) - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, - &prefix, &rest) != Pattern_Prefix_None; + isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, + &prefix, &rest) != Pattern_Prefix_None; break; case OID_INET_SUB_OP: @@ -1867,42 +1863,53 @@ match_special_index_operator(Expr *clause, Oid opclass, case OID_TEXT_ICLIKE_OP: case OID_TEXT_REGEXEQ_OP: case OID_TEXT_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", TEXTOID), opclass) || - !op_in_opclass(find_operator("<", TEXTOID), opclass)) - isIndexable = false; + if (lc_collate_is_c()) + isIndexable = (op_in_opclass(find_operator(">=", TEXTOID), opclass) + && op_in_opclass(find_operator("<", TEXTOID), opclass)); + else + isIndexable = (op_in_opclass(find_operator("~>=~", TEXTOID), opclass) + && op_in_opclass(find_operator("~<~", TEXTOID), opclass)); break; case OID_BYTEA_LIKE_OP: - if (!op_in_opclass(find_operator(">=", BYTEAOID), opclass) || - !op_in_opclass(find_operator("<", BYTEAOID), opclass)) - isIndexable = false; + isIndexable = (op_in_opclass(find_operator(">=", BYTEAOID), opclass) + && op_in_opclass(find_operator("<", BYTEAOID), opclass)); break; case OID_BPCHAR_LIKE_OP: case OID_BPCHAR_ICLIKE_OP: case OID_BPCHAR_REGEXEQ_OP: case OID_BPCHAR_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", BPCHAROID), opclass) || - !op_in_opclass(find_operator("<", BPCHAROID), opclass)) - isIndexable = false; + if (lc_collate_is_c()) + isIndexable = (op_in_opclass(find_operator(">=", BPCHAROID), opclass) + && op_in_opclass(find_operator("<", BPCHAROID), opclass)); + else + isIndexable = (op_in_opclass(find_operator("~>=~", BPCHAROID), opclass) + && op_in_opclass(find_operator("~<~", BPCHAROID), opclass)); break; case OID_VARCHAR_LIKE_OP: case OID_VARCHAR_ICLIKE_OP: case OID_VARCHAR_REGEXEQ_OP: case OID_VARCHAR_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", VARCHAROID), opclass) || - !op_in_opclass(find_operator("<", VARCHAROID), opclass)) - isIndexable = false; + if (lc_collate_is_c()) + isIndexable = (op_in_opclass(find_operator(">=", VARCHAROID), opclass) + && op_in_opclass(find_operator("<", VARCHAROID), opclass)); + else + isIndexable = (op_in_opclass(find_operator("~>=~", VARCHAROID), opclass) + && op_in_opclass(find_operator("~<~", VARCHAROID), opclass)); break; case OID_NAME_LIKE_OP: case OID_NAME_ICLIKE_OP: case OID_NAME_REGEXEQ_OP: case OID_NAME_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", NAMEOID), opclass) || - !op_in_opclass(find_operator("<", NAMEOID), opclass)) - isIndexable = false; + if (lc_collate_is_c()) + isIndexable = (op_in_opclass(find_operator(">=", NAMEOID), opclass) + && op_in_opclass(find_operator("<", NAMEOID), opclass)); + else + isIndexable = (op_in_opclass(find_operator("~>=~", NAMEOID), opclass) + && op_in_opclass(find_operator("~<~", NAMEOID), opclass)); break; case OID_INET_SUB_OP: @@ -2039,6 +2046,7 @@ prefix_quals(Node *leftop, Oid expr_op, List *result; Oid datatype; Oid oproid; + const char *oprname; char *prefix; Const *con; Expr *expr; @@ -2098,9 +2106,10 @@ prefix_quals(Node *leftop, Oid expr_op, */ if (pstatus == Pattern_Prefix_Exact) { - oproid = find_operator("=", datatype); + oprname = (datatype == BYTEAOID || lc_collate_is_c() ? "=" : "~=~"); + oproid = find_operator(oprname, datatype); if (oproid == InvalidOid) - elog(ERROR, "prefix_quals: no = operator for type %u", datatype); + elog(ERROR, "prefix_quals: no operator %s for type %u", oprname, datatype); con = string_to_const(prefix, datatype); expr = make_opclause(oproid, BOOLOID, false, (Expr *) leftop, (Expr *) con); @@ -2113,9 +2122,10 @@ prefix_quals(Node *leftop, Oid expr_op, * * We can always say "x >= prefix". */ - oproid = find_operator(">=", datatype); + oprname = (datatype == BYTEAOID || lc_collate_is_c() ? ">=" : "~>=~"); + oproid = find_operator(oprname, datatype); if (oproid == InvalidOid) - elog(ERROR, "prefix_quals: no >= operator for type %u", datatype); + elog(ERROR, "prefix_quals: no operator %s for type %u", oprname, datatype); con = string_to_const(prefix, datatype); expr = make_opclause(oproid, BOOLOID, false, (Expr *) leftop, (Expr *) con); @@ -2129,9 +2139,10 @@ prefix_quals(Node *leftop, Oid expr_op, greaterstr = make_greater_string(con); if (greaterstr) { - oproid = find_operator("<", datatype); + oprname = (datatype == BYTEAOID || lc_collate_is_c() ? "<" : "~<~"); + oproid = find_operator(oprname, datatype); if (oproid == InvalidOid) - elog(ERROR, "prefix_quals: no < operator for type %u", datatype); + elog(ERROR, "prefix_quals: no operator %s for type %u", oprname, datatype); expr = make_opclause(oproid, BOOLOID, false, (Expr *) leftop, (Expr *) greaterstr); result = lappend(result, expr); diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c index b7a56cb1cb1..37dca0b0c63 100644 --- a/src/backend/utils/adt/name.c +++ b/src/backend/utils/adt/name.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/name.c,v 1.45 2003/05/09 21:19:49 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/name.c,v 1.46 2003/05/15 15:50:18 petere Exp $ * *------------------------------------------------------------------------- */ @@ -182,6 +182,65 @@ namege(PG_FUNCTION_ARGS) } +/* + * comparison routines for LIKE indexing support + */ + +Datum +name_pattern_eq(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0); +} + +Datum +name_pattern_ne(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0); +} + +Datum +name_pattern_lt(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) < 0); +} + +Datum +name_pattern_le(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) <= 0); +} + +Datum +name_pattern_gt(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) > 0); +} + +Datum +name_pattern_ge(PG_FUNCTION_ARGS) +{ + Name arg1 = PG_GETARG_NAME(0); + Name arg2 = PG_GETARG_NAME(1); + + PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) >= 0); +} + + /* (see char.c for comparison/operation routines) */ int diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 729d085c3ca..5ff4b1931da 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.136 2003/04/16 04:37:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.137 2003/05/15 15:50:18 petere Exp $ * *------------------------------------------------------------------------- */ @@ -180,8 +180,6 @@ static void get_join_vars(List *args, Var **var1, Var **var2); static Selectivity prefix_selectivity(Query *root, Var *var, Oid vartype, Const *prefix); static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); -static bool string_lessthan(const char *str1, const char *str2, - Oid datatype); static Oid find_operator(const char *opname, Oid datatype); static Datum string_to_datum(const char *str, Oid datatype); static Const *string_to_const(const char *str, Oid datatype); @@ -3619,51 +3617,21 @@ pattern_selectivity(Const *patt, Pattern_Type ptype) /* - * We want to test whether the database's LC_COLLATE setting is safe for - * LIKE/regexp index optimization. + * Try to generate a string greater than the given string or any + * string it is a prefix of. If successful, return a palloc'd string; + * else return NULL. * * The key requirement here is that given a prefix string, say "foo", * we must be able to generate another string "fop" that is greater - * than all strings "foobar" starting with "foo". Unfortunately, a - * non-C locale may have arbitrary collation rules in which "fop" > - * "foo" is not sufficient to ensure "fop" > "foobar". Until we can - * come up with a more bulletproof way of generating the upper-bound - * string, the optimization is disabled in all non-C locales. + * than all strings "foobar" starting with "foo". * - * (In theory, locales other than C may be LIKE-safe so this function - * could be different from lc_collate_is_c(), but in a different - * theory, non-C locales are completely unpredictable so it's unlikely - * to happen.) + * If we max out the righthand byte, truncate off the last character + * and start incrementing the next. For example, if "z" were the last + * character in the sort order, then we could produce "foo" as a + * string greater than "fonz". * - * Be sure to maintain the correspondence with the code in initdb. - */ -bool -locale_is_like_safe(void) -{ - return lc_collate_is_c(); -} - -/* - * Try to generate a string greater than the given string or any string it is - * a prefix of. If successful, return a palloc'd string; else return NULL. - * - * To work correctly in non-ASCII locales with weird collation orders, - * we cannot simply increment "foo" to "fop" --- we have to check whether - * we actually produced a string greater than the given one. If not, - * increment the righthand byte again and repeat. If we max out the righthand - * byte, truncate off the last character and start incrementing the next. - * For example, if "z" were the last character in the sort order, then we - * could produce "foo" as a string greater than "fonz". - * - * This could be rather slow in the worst case, but in most cases we won't - * have to try more than one or two strings before succeeding. - * - * XXX this is actually not sufficient, since it only copes with the case - * where individual characters collate in an order different from their - * numeric code assignments. It does not handle cases where there are - * cross-character effects, such as specially sorted digraphs, multiple - * sort passes, etc. For now, we just shut down the whole thing in locales - * that do such things :-( + * This could be rather slow in the worst case, but in most cases we + * won't have to try more than one or two strings before succeeding. */ Const * make_greater_string(const Const *str_const) @@ -3699,18 +3667,16 @@ make_greater_string(const Const *str_const) /* * Try to generate a larger string by incrementing the last byte. */ - while (*lastchar < (unsigned char) 255) + if (*lastchar < (unsigned char) 255) { + Const *workstr_const; + (*lastchar)++; - if (string_lessthan(str, workstr, datatype)) - { - /* Success! */ - Const *workstr_const = string_to_const(workstr, datatype); + workstr_const = string_to_const(workstr, datatype); - pfree(str); - pfree(workstr); - return workstr_const; - } + pfree(str); + pfree(workstr); + return workstr_const; } /* restore last byte so we don't confuse pg_mbcliplen */ @@ -3736,57 +3702,6 @@ make_greater_string(const Const *str_const) return (Const *) NULL; } -/* - * Test whether two strings are "<" according to the rules of the given - * datatype. We do this the hard way, ie, actually calling the type's - * "<" operator function, to ensure we get the right result... - */ -static bool -string_lessthan(const char *str1, const char *str2, Oid datatype) -{ - Datum datum1 = string_to_datum(str1, datatype); - Datum datum2 = string_to_datum(str2, datatype); - bool result; - - switch (datatype) - { - case TEXTOID: - result = DatumGetBool(DirectFunctionCall2(text_lt, - datum1, datum2)); - break; - - case BPCHAROID: - result = DatumGetBool(DirectFunctionCall2(bpcharlt, - datum1, datum2)); - break; - - case VARCHAROID: - result = DatumGetBool(DirectFunctionCall2(varcharlt, - datum1, datum2)); - break; - - case NAMEOID: - result = DatumGetBool(DirectFunctionCall2(namelt, - datum1, datum2)); - break; - - case BYTEAOID: - result = DatumGetBool(DirectFunctionCall2(bytealt, - datum1, datum2)); - break; - - default: - elog(ERROR, "string_lessthan: unexpected datatype %u", datatype); - result = false; - break; - } - - pfree(DatumGetPointer(datum1)); - pfree(DatumGetPointer(datum2)); - - return result; -} - /* See if there is a binary op of the given name for the given datatype */ /* NB: we assume that only built-in system operators are searched for */ static Oid diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2a5f97ff028..6be21d241f1 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.97 2003/05/09 15:44:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.98 2003/05/15 15:50:19 petere Exp $ * *------------------------------------------------------------------------- */ @@ -1050,6 +1050,149 @@ text_smaller(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(result); } + +/* + * The following operators support character-by-character comparison + * of text data types, to allow building indexes suitable for LIKE + * clauses. + */ + +static int +internal_text_pattern_compare(text *arg1, text *arg2) +{ + int result; + + result = memcmp(VARDATA(arg1), VARDATA(arg2), + Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ); + if (result != 0) + return result; + else if (VARSIZE(arg1) < VARSIZE(arg2)) + return -1; + else if (VARSIZE(arg1) > VARSIZE(arg2)) + return 1; + else + return 0; +} + + +Datum +text_pattern_lt(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result < 0); +} + + +Datum +text_pattern_le(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result <= 0); +} + + +Datum +text_pattern_eq(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + if (VARSIZE(arg1) != VARSIZE(arg2)) + result = 1; + else + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result == 0); +} + + +Datum +text_pattern_ge(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result >= 0); +} + + +Datum +text_pattern_gt(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result > 0); +} + + +Datum +text_pattern_ne(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + if (VARSIZE(arg1) != VARSIZE(arg2)) + result = 1; + else + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result != 0); +} + + +Datum +bttext_pattern_cmp(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_P(0); + text *arg2 = PG_GETARG_TEXT_P(1); + int result; + + result = internal_text_pattern_compare(arg1, arg2); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + + /*------------------------------------------------------------- * byteaoctetlen * |