diff options
Diffstat (limited to 'src/backend/utils/adt/like.c')
-rw-r--r-- | src/backend/utils/adt/like.c | 470 |
1 files changed, 356 insertions, 114 deletions
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 5a7b8473392..058fb1d9656 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.37 2000/07/07 21:12:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.38 2000/08/06 18:05:41 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -20,97 +20,222 @@ #include "mb/pg_wchar.h" #include "utils/builtins.h" -static bool like(pg_wchar * text, pg_wchar * p); + +#define LIKE_TRUE 1 +#define LIKE_FALSE 0 +#define LIKE_ABORT (-1) + + +static int MatchText(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e); +static int MatchTextLower(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e); + /* * interface routines called by the function manager */ -/* - fixedlen_like: +Datum +namelike(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_P(1); - a generic fixed length like routine - s - the string to match against (not necessarily null-terminated) - p - the pattern (as text*) - charlen - the length of the string -*/ -static bool -fixedlen_like(char *s, text *p, int charlen) -{ - pg_wchar *sterm, - *pterm; - bool result; - int len; - - /* be sure sterm is null-terminated */ -#ifdef MULTIBYTE - sterm = (pg_wchar *) palloc((charlen + 1) * sizeof(pg_wchar)); - (void) pg_mb2wchar_with_len((unsigned char *) s, sterm, charlen); -#else - sterm = (char *) palloc(charlen + 1); - memcpy(sterm, s, charlen); - sterm[charlen] = '\0'; -#endif + PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + == LIKE_TRUE); +} - /* - * p is a text, not a string so we have to make a string - * from the vl_data field of the struct. - */ +Datum +namenlike(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_P(1); - /* palloc the length of the text + the null character */ - len = VARSIZE(p) - VARHDRSZ; -#ifdef MULTIBYTE - pterm = (pg_wchar *) palloc((len + 1) * sizeof(pg_wchar)); - (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(p), pterm, len); -#else - pterm = (char *) palloc(len + 1); - memcpy(pterm, VARDATA(p), len); - *(pterm + len) = '\0'; -#endif + PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + != LIKE_TRUE); +} - /* do the regexp matching */ - result = like(sterm, pterm); +Datum +namelike_escape(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); - pfree(sterm); - pfree(pterm); + PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + == LIKE_TRUE); +} - return result; +Datum +namenlike_escape(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); + + PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + != LIKE_TRUE); } Datum -namelike(PG_FUNCTION_ARGS) +textlike(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + + PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + == LIKE_TRUE); +} + +Datum +textnlike(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + + PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + != LIKE_TRUE); +} + +Datum +textlike_escape(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); + + PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + == LIKE_TRUE); +} + +Datum +textnlike_escape(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); + + PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + != LIKE_TRUE); +} + +/* + * Case-insensitive versions + */ + +Datum +inamelike(PG_FUNCTION_ARGS) { Name n = PG_GETARG_NAME(0); text *p = PG_GETARG_TEXT_P(1); - PG_RETURN_BOOL(fixedlen_like(NameStr(*n), p, strlen(NameStr(*n)))); + PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + == LIKE_TRUE); } Datum -namenlike(PG_FUNCTION_ARGS) +inamenlike(PG_FUNCTION_ARGS) { Name n = PG_GETARG_NAME(0); text *p = PG_GETARG_TEXT_P(1); - PG_RETURN_BOOL(! fixedlen_like(NameStr(*n), p, strlen(NameStr(*n)))); + PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + != LIKE_TRUE); } Datum -textlike(PG_FUNCTION_ARGS) +inamelike_escape(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); + + PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + == LIKE_TRUE); +} + +Datum +inamenlike_escape(PG_FUNCTION_ARGS) +{ + Name n = PG_GETARG_NAME(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); + + PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + != LIKE_TRUE); +} + +Datum +itextlike(PG_FUNCTION_ARGS) { text *s = PG_GETARG_TEXT_P(0); text *p = PG_GETARG_TEXT_P(1); - PG_RETURN_BOOL(fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ)); + PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + == LIKE_TRUE); } Datum -textnlike(PG_FUNCTION_ARGS) +itextnlike(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + + PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + NULL) + != LIKE_TRUE); +} + +Datum +itextlike_escape(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); + + PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + == LIKE_TRUE); +} + +Datum +itextnlike_escape(PG_FUNCTION_ARGS) { text *s = PG_GETARG_TEXT_P(0); text *p = PG_GETARG_TEXT_P(1); + text *e = PG_GETARG_TEXT_P(2); - PG_RETURN_BOOL(! fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ)); + PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ), + VARDATA(p), (VARSIZE(p)-VARHDRSZ), + ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL)) + != LIKE_TRUE); } @@ -136,12 +261,16 @@ textnlike(PG_FUNCTION_ARGS) ** LIKE <pattern> ESCAPE <escape character>. We are a small operation ** so we force you to use '\'. - ay 7/95] ** +** OK, we now support the SQL9x LIKE <pattern> ESCAPE <char> syntax. +** We should kill the backslash escaping mechanism since it is non-standard +** and undocumented afaik. +** The code is rewritten to avoid requiring null-terminated strings, +** which in turn allows us to leave out some memcpy() operations. +** This code should be faster and take less memory, but no promises... +** - thomas 2000-08-06 +** */ -#define LIKE_TRUE 1 -#define LIKE_FALSE 0 -#define LIKE_ABORT (-1) - /*-------------------- * Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT. * @@ -153,69 +282,97 @@ textnlike(PG_FUNCTION_ARGS) * pattern either, so an upper-level % scan can stop scanning now. *-------------------- */ + +#define NextChar(p, plen) (p)++, (plen)-- + static int -DoMatch(pg_wchar * text, pg_wchar * p) +MatchText(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e) { - for (; *p && *text; text ++, p++) + /* Fast path for match-everything pattern + * Include weird case of escape character as a percent sign or underscore, + * when presumably that wildcard character becomes a literal. + */ + if ((plen == 1) && (*p == '%') + && ! ((e != NULL) && (*e == '%'))) + return LIKE_TRUE; + + while ((tlen > 0) && (plen > 0)) { - switch (*p) + /* If an escape character was specified and we find it here in the pattern, + * then we'd better have an exact match for the next character. + */ + if ((e != NULL) && (*p == *e)) { - case '\\': - /* Literal match with following character. */ - p++; - /* FALLTHROUGH */ - default: - if (*text !=*p) - return LIKE_FALSE; - break; - case '_': - /* Match any single character. */ - break; - case '%': - /* %% is the same as % according to the SQL standard */ - /* Advance past all %'s */ - while (*p == '%') - p++; - /* Trailing percent matches everything. */ - if (*p == '\0') - return LIKE_TRUE; - - /* - * Otherwise, scan for a text position at which we can - * match the rest of the pattern. - */ - for (; *text; text ++) - { + NextChar(p, plen); + if ((plen <= 0) || (*t != *p)) + return LIKE_FALSE; + } + else + { + switch (*p) + { + case '\\': + /* Literal match with following character. */ + NextChar(p, plen); + /* FALLTHROUGH */ + default: + if (*t != *p) + return LIKE_FALSE; + break; + case '_': + /* Match any single character. */ + break; + case '%': + /* %% is the same as % according to the SQL standard */ + /* Advance past all %'s */ + while ((plen > 0) && (*p == '%')) + NextChar(p, plen); + /* Trailing percent matches everything. */ + if (plen <= 0) + return LIKE_TRUE; /* - * Optimization to prevent most recursion: don't - * recurse unless first pattern char might match this - * text char. + * Otherwise, scan for a text position at which we can + * match the rest of the pattern. */ - if (*text == *p || *p == '\\' || *p == '_') + while (tlen > 0) { - int matched = DoMatch(text, p); + /* + * Optimization to prevent most recursion: don't + * recurse unless first pattern char might match this + * text char. + */ + if ((*t == *p) || (*p == '\\') || (*p == '_') + || ((e != NULL) && (*p == *e))) + { + int matched = MatchText(t, tlen, p, plen, e); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } - if (matched != LIKE_FALSE) - return matched; /* TRUE or ABORT */ + NextChar(t, tlen); } - } - /* - * End of text with no match, so no point in trying later - * places to start matching this pattern. - */ - return LIKE_ABORT; + /* + * End of text with no match, so no point in trying later + * places to start matching this pattern. + */ + return LIKE_ABORT; + } } + + NextChar(t, tlen); + NextChar(p, plen); } - if (*text !='\0') + if (tlen > 0) return LIKE_FALSE; /* end of pattern, but not of text */ /* End of input string. Do we have matching pattern remaining? */ - while (*p == '%') /* allow multiple %'s at end of pattern */ - p++; - if (*p == '\0') + while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of pattern */ + NextChar(p, plen); + if (plen <= 0) return LIKE_TRUE; /* @@ -223,16 +380,101 @@ DoMatch(pg_wchar * text, pg_wchar * p) * start matching this pattern. */ return LIKE_ABORT; -} +} /* MatchText() */ -/* -** User-level routine. Returns TRUE or FALSE. -*/ -static bool -like(pg_wchar * text, pg_wchar * p) +static int +MatchTextLower(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e) { - /* Fast path for match-everything pattern */ - if (p[0] == '%' && p[1] == '\0') - return true; - return DoMatch(text, p) == LIKE_TRUE; -} + /* Fast path for match-everything pattern + * Include weird case of escape character as a percent sign or underscore, + * when presumably that wildcard character becomes a literal. + */ + if ((plen == 1) && (*p == '%') + && ! ((e != NULL) && (*e == '%'))) + return LIKE_TRUE; + + while ((tlen > 0) && (plen > 0)) + { + /* If an escape character was specified and we find it here in the pattern, + * then we'd better have an exact match for the next character. + */ + if ((e != NULL) && (tolower(*p) == tolower(*e))) + { + NextChar(p, plen); + if ((plen <= 0) || (tolower(*t) != tolower(*p))) + return LIKE_FALSE; + } + else + { + switch (*p) + { + case '\\': + /* Literal match with following character. */ + NextChar(p, plen); + /* FALLTHROUGH */ + default: + if (tolower(*t) != tolower(*p)) + return LIKE_FALSE; + break; + case '_': + /* Match any single character. */ + break; + case '%': + /* %% is the same as % according to the SQL standard */ + /* Advance past all %'s */ + while ((plen > 0) && (*p == '%')) + NextChar(p, plen); + /* Trailing percent matches everything. */ + if (plen <= 0) + return LIKE_TRUE; + + /* + * Otherwise, scan for a text position at which we can + * match the rest of the pattern. + */ + while (tlen > 0) + { + /* + * Optimization to prevent most recursion: don't + * recurse unless first pattern char might match this + * text char. + */ + if ((tolower(*t) == tolower(*p)) || (*p == '\\') || (*p == '_') + || ((e != NULL) && (tolower(*p) == tolower(*e)))) + { + int matched = MatchText(t, tlen, p, plen, e); + + if (matched != LIKE_FALSE) + return matched; /* TRUE or ABORT */ + } + + NextChar(t, tlen); + } + + /* + * End of text with no match, so no point in trying later + * places to start matching this pattern. + */ + return LIKE_ABORT; + } + } + + NextChar(t, tlen); + NextChar(p, plen); + } + + if (tlen > 0) + return LIKE_FALSE; /* end of pattern, but not of text */ + + /* End of input string. Do we have matching pattern remaining? */ + while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of pattern */ + NextChar(p, plen); + if (plen <= 0) + return LIKE_TRUE; + + /* + * End of text with no match, so no point in trying later places to + * start matching this pattern. + */ + return LIKE_ABORT; +} /* MatchTextLower() */ |