diff options
| author | Jeff Davis <jdavis@postgresql.org> | 2025-10-21 09:31:49 -0700 |
|---|---|---|
| committer | Jeff Davis <jdavis@postgresql.org> | 2025-10-21 09:31:49 -0700 |
| commit | e113f9c102b7e2462facf0ecffc97f8093efed54 (patch) | |
| tree | 5aa3712423242e0a045b295f3a776e4e0afb6adb /src/backend/tsearch/ts_locale.c | |
| parent | 776c2c2ae2d3ba7d9b5d7d780df67af1924e7591 (diff) | |
tsearch: use database default collation for parsing.
Previously, tsearch used the database's CTYPE setting, which only
matches the database default collation if the locale provider is libc.
Note that tsearch types (tsvector and tsquery) are not collatable
types. The locale affects parsing the original text, which is a lossy
process, so a COLLATE clause on the already-parsed value would not
make sense.
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/0151ad01239e2cc7b3139644358cf8f7b9622ff7.camel@j-davis.com
Diffstat (limited to 'src/backend/tsearch/ts_locale.c')
| -rw-r--r-- | src/backend/tsearch/ts_locale.c | 40 |
1 files changed, 14 insertions, 26 deletions
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 4801fe90089..4422f042d12 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -20,45 +20,33 @@ static void tsearch_readline_callback(void *arg); -/* - * The reason these functions use a 3-wchar_t output buffer, not 2 as you - * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be - * getting from char2wchar() is UTF16 not UTF32. A single input character - * may therefore produce a surrogate pair rather than just one wchar_t; - * we also need room for a trailing null. When we do get a surrogate pair, - * we pass just the first code to iswdigit() etc, so that these functions will - * always return false for characters outside the Basic Multilingual Plane. - */ -#define WC_BUF_LEN 3 +/* space for a single character plus a trailing NUL */ +#define WC_BUF_LEN 2 int t_isalpha(const char *ptr) { - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - locale_t mylocale = 0; /* TODO */ + pg_wchar wstr[WC_BUF_LEN]; + int wlen pg_attribute_unused(); - if (clen == 1 || database_ctype_is_c) - return isalpha(TOUCHAR(ptr)); + wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr)); + Assert(wlen <= 1); - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswalpha((wint_t) character[0]); + /* pass single character, or NUL if empty */ + return pg_iswalpha(wstr[0], pg_database_locale()); } int t_isalnum(const char *ptr) { - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isalnum(TOUCHAR(ptr)); + pg_wchar wstr[WC_BUF_LEN]; + int wlen pg_attribute_unused(); - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); + wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr)); + Assert(wlen <= 1); - return iswalnum((wint_t) character[0]); + /* pass single character, or NUL if empty */ + return pg_iswalnum(wstr[0], pg_database_locale()); } |
