From 1e16a8107db9a50435b39e09c6f9c52c45e63e1a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 10 Apr 2011 18:02:17 -0400 Subject: Teach regular expression operators to honor collations. This involves getting the character classification and case-folding functions in the regex library to use the collations infrastructure. Most of this work had been done already in connection with the upper/lower and LIKE logic, so it was a simple matter of transposition. While at it, split out these functions into a separate source file regc_pg_locale.c, so that they can be correctly labeled with the Postgres project's license rather than the Scriptics license. These functions are 100% Postgres-written code whereas what remains in regc_locale.c is still mostly not ours, so lumping them both under the same copyright notice was getting more and more misleading. --- src/backend/regex/regc_locale.c | 165 ---------------------------------------- 1 file changed, 165 deletions(-) (limited to 'src/backend/regex/regc_locale.c') diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c index 4f891973643..0f70931b13e 100644 --- a/src/backend/regex/regc_locale.c +++ b/src/backend/regex/regc_locale.c @@ -350,171 +350,6 @@ static const struct cname }; -/* - * ctype functions adapted to work on pg_wchar (a/k/a chr) - * - * When working in UTF8 encoding, we use the functions if - * available. This assumes that every platform uses Unicode codepoints - * directly as the wchar_t representation of Unicode. On some platforms - * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. - * - * In all other encodings, we use the functions for pg_wchar - * values up to 255, and punt for values above that. This is only 100% - * correct in single-byte encodings such as LATINn. However, non-Unicode - * multibyte encodings are mostly Far Eastern character sets for which the - * properties being tested here aren't relevant for higher code values anyway. - * - * NB: the coding here assumes pg_wchar is an unsigned type. - */ - -static int -pg_wc_isdigit(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isdigit((unsigned char) c)); -} - -static int -pg_wc_isalpha(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isalpha((unsigned char) c)); -} - -static int -pg_wc_isalnum(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isalnum((unsigned char) c)); -} - -static int -pg_wc_isupper(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isupper((unsigned char) c)); -} - -static int -pg_wc_islower(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && islower((unsigned char) c)); -} - -static int -pg_wc_isgraph(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isgraph((unsigned char) c)); -} - -static int -pg_wc_isprint(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isprint((unsigned char) c)); -} - -static int -pg_wc_ispunct(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && ispunct((unsigned char) c)); -} - -static int -pg_wc_isspace(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace((wint_t) c); - } -#endif - return (c <= (pg_wchar) UCHAR_MAX && isspace((unsigned char) c)); -} - -static pg_wchar -pg_wc_toupper(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper((wint_t) c); - } -#endif - if (c <= (pg_wchar) UCHAR_MAX) - return toupper((unsigned char) c); - return c; -} - -static pg_wchar -pg_wc_tolower(pg_wchar c) -{ -#ifdef USE_WIDE_UPPER_LOWER - if (GetDatabaseEncoding() == PG_UTF8) - { - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower((wint_t) c); - } -#endif - if (c <= (pg_wchar) UCHAR_MAX) - return tolower((unsigned char) c); - return c; -} - - /* * element - map collating-element name to celt */ -- cgit v1.2.3