diff options
| -rw-r--r-- | src/backend/tsearch/ts_locale.c | 40 | ||||
| -rw-r--r-- | src/backend/tsearch/wparser_def.c | 71 |
2 files changed, 27 insertions, 84 deletions
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 4801fe90089..4422f042d12 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -20,45 +20,33 @@ static void tsearch_readline_callback(void *arg); -/* - * The reason these functions use a 3-wchar_t output buffer, not 2 as you - * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be - * getting from char2wchar() is UTF16 not UTF32. A single input character - * may therefore produce a surrogate pair rather than just one wchar_t; - * we also need room for a trailing null. When we do get a surrogate pair, - * we pass just the first code to iswdigit() etc, so that these functions will - * always return false for characters outside the Basic Multilingual Plane. - */ -#define WC_BUF_LEN 3 +/* space for a single character plus a trailing NUL */ +#define WC_BUF_LEN 2 int t_isalpha(const char *ptr) { - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - locale_t mylocale = 0; /* TODO */ + pg_wchar wstr[WC_BUF_LEN]; + int wlen pg_attribute_unused(); - if (clen == 1 || database_ctype_is_c) - return isalpha(TOUCHAR(ptr)); + wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr)); + Assert(wlen <= 1); - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswalpha((wint_t) character[0]); + /* pass single character, or NUL if empty */ + return pg_iswalpha(wstr[0], pg_database_locale()); } int t_isalnum(const char *ptr) { - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isalnum(TOUCHAR(ptr)); + pg_wchar wstr[WC_BUF_LEN]; + int wlen pg_attribute_unused(); - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); + wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr)); + Assert(wlen <= 1); - return iswalnum((wint_t) character[0]); + /* pass single character, or NUL if empty */ + return pg_iswalnum(wstr[0], pg_database_locale()); } diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index e2dd3da3aa3..251a2ae6563 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -243,9 +243,7 @@ typedef struct TParser /* string and position information */ char *str; /* multibyte string */ int lenstr; /* length of mbstring */ - wchar_t *wstr; /* wide character string */ pg_wchar *pgwstr; /* wide character string for C-locale */ - bool usewide; /* State of parse */ int charmaxlen; @@ -293,33 +291,8 @@ TParserInit(char *str, int len) prs->charmaxlen = pg_database_encoding_max_length(); prs->str = str; prs->lenstr = len; - - /* - * Use wide char code only when max encoding length > 1. - */ - if (prs->charmaxlen > 1) - { - locale_t mylocale = 0; /* TODO */ - - prs->usewide = true; - if (database_ctype_is_c) - { - /* - * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could - * be different from sizeof(wchar_t) - */ - prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1)); - pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr); - } - else - { - prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1)); - char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, - mylocale); - } - } - else - prs->usewide = false; + prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1)); + pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr); prs->state = newTParserPosition(NULL); prs->state->state = TPS_Base; @@ -350,12 +323,9 @@ TParserCopyInit(const TParser *orig) prs->charmaxlen = orig->charmaxlen; prs->str = orig->str + orig->state->posbyte; prs->lenstr = orig->lenstr - orig->state->posbyte; - prs->usewide = orig->usewide; if (orig->pgwstr) prs->pgwstr = orig->pgwstr + orig->state->poschar; - if (orig->wstr) - prs->wstr = orig->wstr + orig->state->poschar; prs->state = newTParserPosition(NULL); prs->state->state = TPS_Base; @@ -379,8 +349,6 @@ TParserClose(TParser *prs) prs->state = ptr; } - if (prs->wstr) - pfree(prs->wstr); if (prs->pgwstr) pfree(prs->pgwstr); @@ -412,13 +380,9 @@ TParserCopyClose(TParser *prs) /* - * Character-type support functions, equivalent to is* macros, but - * working with any possible encodings and locales. Notes: - * - with multibyte encoding and C-locale isw* function may fail - * or give wrong result. - * - multibyte encoding and C-locale often are used for - * Asian languages. - * - if locale is C then we use pgwstr instead of wstr. + * Character-type support functions using the database default locale. If the + * locale is C, and the input character is non-ascii, the value to be returned + * is determined by the 'nonascii' macro argument. */ #define p_iswhat(type, nonascii) \ @@ -426,19 +390,13 @@ TParserCopyClose(TParser *prs) static int \ p_is##type(TParser *prs) \ { \ + pg_locale_t locale = pg_database_locale(); \ + pg_wchar wc; \ Assert(prs->state); \ - if (prs->usewide) \ - { \ - if (prs->pgwstr) \ - { \ - unsigned int c = *(prs->pgwstr + prs->state->poschar); \ - if (c > 0x7f) \ - return nonascii; \ - return is##type(c); \ - } \ - return isw##type(*(prs->wstr + prs->state->poschar)); \ - } \ - return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \ + wc = prs->pgwstr[prs->state->poschar]; \ + if (prs->charmaxlen > 1 && locale->ctype_is_c && wc > 0x7f) \ + return nonascii; \ + return pg_isw##type(wc, pg_database_locale()); \ } \ \ static int \ @@ -703,7 +661,7 @@ p_isspecial(TParser *prs) * Check that only in utf encoding, because other encodings aren't * supported by postgres or even exists. */ - if (GetDatabaseEncoding() == PG_UTF8 && prs->usewide) + if (GetDatabaseEncoding() == PG_UTF8) { static const pg_wchar strange_letter[] = { /* @@ -944,10 +902,7 @@ p_isspecial(TParser *prs) *StopMiddle; pg_wchar c; - if (prs->pgwstr) - c = *(prs->pgwstr + prs->state->poschar); - else - c = (pg_wchar) *(prs->wstr + prs->state->poschar); + c = *(prs->pgwstr + prs->state->poschar); while (StopLow < StopHigh) { |
