summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/tsearch/ts_locale.c40
-rw-r--r--src/backend/tsearch/wparser_def.c71
2 files changed, 27 insertions, 84 deletions
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 4801fe90089..4422f042d12 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -20,45 +20,33 @@
static void tsearch_readline_callback(void *arg);
-/*
- * The reason these functions use a 3-wchar_t output buffer, not 2 as you
- * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
- * getting from char2wchar() is UTF16 not UTF32. A single input character
- * may therefore produce a surrogate pair rather than just one wchar_t;
- * we also need room for a trailing null. When we do get a surrogate pair,
- * we pass just the first code to iswdigit() etc, so that these functions will
- * always return false for characters outside the Basic Multilingual Plane.
- */
-#define WC_BUF_LEN 3
+/* space for a single character plus a trailing NUL */
+#define WC_BUF_LEN 2
int
t_isalpha(const char *ptr)
{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- locale_t mylocale = 0; /* TODO */
+ pg_wchar wstr[WC_BUF_LEN];
+ int wlen pg_attribute_unused();
- if (clen == 1 || database_ctype_is_c)
- return isalpha(TOUCHAR(ptr));
+ wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
+ Assert(wlen <= 1);
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswalpha((wint_t) character[0]);
+ /* pass single character, or NUL if empty */
+ return pg_iswalpha(wstr[0], pg_database_locale());
}
int
t_isalnum(const char *ptr)
{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isalnum(TOUCHAR(ptr));
+ pg_wchar wstr[WC_BUF_LEN];
+ int wlen pg_attribute_unused();
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+ wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
+ Assert(wlen <= 1);
- return iswalnum((wint_t) character[0]);
+ /* pass single character, or NUL if empty */
+ return pg_iswalnum(wstr[0], pg_database_locale());
}
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index e2dd3da3aa3..251a2ae6563 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -243,9 +243,7 @@ typedef struct TParser
/* string and position information */
char *str; /* multibyte string */
int lenstr; /* length of mbstring */
- wchar_t *wstr; /* wide character string */
pg_wchar *pgwstr; /* wide character string for C-locale */
- bool usewide;
/* State of parse */
int charmaxlen;
@@ -293,33 +291,8 @@ TParserInit(char *str, int len)
prs->charmaxlen = pg_database_encoding_max_length();
prs->str = str;
prs->lenstr = len;
-
- /*
- * Use wide char code only when max encoding length > 1.
- */
- if (prs->charmaxlen > 1)
- {
- locale_t mylocale = 0; /* TODO */
-
- prs->usewide = true;
- if (database_ctype_is_c)
- {
- /*
- * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
- * be different from sizeof(wchar_t)
- */
- prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
- pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
- }
- else
- {
- prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
- char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr,
- mylocale);
- }
- }
- else
- prs->usewide = false;
+ prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
+ pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
prs->state = newTParserPosition(NULL);
prs->state->state = TPS_Base;
@@ -350,12 +323,9 @@ TParserCopyInit(const TParser *orig)
prs->charmaxlen = orig->charmaxlen;
prs->str = orig->str + orig->state->posbyte;
prs->lenstr = orig->lenstr - orig->state->posbyte;
- prs->usewide = orig->usewide;
if (orig->pgwstr)
prs->pgwstr = orig->pgwstr + orig->state->poschar;
- if (orig->wstr)
- prs->wstr = orig->wstr + orig->state->poschar;
prs->state = newTParserPosition(NULL);
prs->state->state = TPS_Base;
@@ -379,8 +349,6 @@ TParserClose(TParser *prs)
prs->state = ptr;
}
- if (prs->wstr)
- pfree(prs->wstr);
if (prs->pgwstr)
pfree(prs->pgwstr);
@@ -412,13 +380,9 @@ TParserCopyClose(TParser *prs)
/*
- * Character-type support functions, equivalent to is* macros, but
- * working with any possible encodings and locales. Notes:
- * - with multibyte encoding and C-locale isw* function may fail
- * or give wrong result.
- * - multibyte encoding and C-locale often are used for
- * Asian languages.
- * - if locale is C then we use pgwstr instead of wstr.
+ * Character-type support functions using the database default locale. If the
+ * locale is C, and the input character is non-ascii, the value to be returned
+ * is determined by the 'nonascii' macro argument.
*/
#define p_iswhat(type, nonascii) \
@@ -426,19 +390,13 @@ TParserCopyClose(TParser *prs)
static int \
p_is##type(TParser *prs) \
{ \
+ pg_locale_t locale = pg_database_locale(); \
+ pg_wchar wc; \
Assert(prs->state); \
- if (prs->usewide) \
- { \
- if (prs->pgwstr) \
- { \
- unsigned int c = *(prs->pgwstr + prs->state->poschar); \
- if (c > 0x7f) \
- return nonascii; \
- return is##type(c); \
- } \
- return isw##type(*(prs->wstr + prs->state->poschar)); \
- } \
- return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
+ wc = prs->pgwstr[prs->state->poschar]; \
+ if (prs->charmaxlen > 1 && locale->ctype_is_c && wc > 0x7f) \
+ return nonascii; \
+ return pg_isw##type(wc, pg_database_locale()); \
} \
\
static int \
@@ -703,7 +661,7 @@ p_isspecial(TParser *prs)
* Check that only in utf encoding, because other encodings aren't
* supported by postgres or even exists.
*/
- if (GetDatabaseEncoding() == PG_UTF8 && prs->usewide)
+ if (GetDatabaseEncoding() == PG_UTF8)
{
static const pg_wchar strange_letter[] = {
/*
@@ -944,10 +902,7 @@ p_isspecial(TParser *prs)
*StopMiddle;
pg_wchar c;
- if (prs->pgwstr)
- c = *(prs->pgwstr + prs->state->poschar);
- else
- c = (pg_wchar) *(prs->wstr + prs->state->poschar);
+ c = *(prs->pgwstr + prs->state->poschar);
while (StopLow < StopHigh)
{