diff options
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/parser/parser.c | 8 | ||||
| -rw-r--r-- | src/backend/parser/scan.l | 8 | ||||
| -rw-r--r-- | src/backend/utils/adt/jsonpath_scan.l | 6 | ||||
| -rw-r--r-- | src/backend/utils/adt/pg_locale_builtin.c | 44 | ||||
| -rw-r--r-- | src/backend/utils/adt/varlena.c | 40 | ||||
| -rw-r--r-- | src/backend/utils/mb/mbutils.c | 4 |
6 files changed, 63 insertions, 47 deletions
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index 33a040506b4..a3679f8e86c 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -339,7 +339,7 @@ hexval(unsigned char c) /* is Unicode code point acceptable? */ static void -check_unicode_value(pg_wchar c) +check_unicode_value(char32_t c) { if (!is_valid_unicode_codepoint(c)) ereport(ERROR, @@ -376,7 +376,7 @@ str_udeescape(const char *str, char escape, char *new, *out; size_t new_len; - pg_wchar pair_first = 0; + char16_t pair_first = 0; ScannerCallbackState scbstate; /* @@ -420,7 +420,7 @@ str_udeescape(const char *str, char escape, isxdigit((unsigned char) in[3]) && isxdigit((unsigned char) in[4])) { - pg_wchar unicode; + char32_t unicode; unicode = (hexval(in[1]) << 12) + (hexval(in[2]) << 8) + @@ -457,7 +457,7 @@ str_udeescape(const char *str, char escape, isxdigit((unsigned char) in[6]) && isxdigit((unsigned char) in[7])) { - pg_wchar unicode; + char32_t unicode; unicode = (hexval(in[2]) << 20) + (hexval(in[3]) << 16) + diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 08990831fe8..a67815339b7 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -121,7 +121,7 @@ static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); static char *litbufdup(core_yyscan_t yyscanner); static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner); static int process_integer_literal(const char *token, YYSTYPE *lval, int base); -static void addunicode(pg_wchar c, yyscan_t yyscanner); +static void addunicode(char32_t c, yyscan_t yyscanner); #define yyerror(msg) scanner_yyerror(msg, yyscanner) @@ -640,7 +640,7 @@ other . addlit(yytext, yyleng, yyscanner); } <xe>{xeunicode} { - pg_wchar c = strtoul(yytext + 2, NULL, 16); + char32_t c = strtoul(yytext + 2, NULL, 16); /* * For consistency with other productions, issue any @@ -668,7 +668,7 @@ other . POP_YYLLOC(); } <xeu>{xeunicode} { - pg_wchar c = strtoul(yytext + 2, NULL, 16); + char32_t c = strtoul(yytext + 2, NULL, 16); /* Remember start of overall string token ... */ PUSH_YYLLOC(); @@ -1376,7 +1376,7 @@ process_integer_literal(const char *token, YYSTYPE *lval, int base) } static void -addunicode(pg_wchar c, core_yyscan_t yyscanner) +addunicode(char32_t c, core_yyscan_t yyscanner) { ScannerCallbackState scbstate; char buf[MAX_UNICODE_EQUIVALENT_STRING + 1]; diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index c7aab83eeb4..8c3a0a9c642 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -574,7 +574,7 @@ hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner) /* Add given unicode character to scanstring */ static bool -addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner) +addUnicodeChar(char32_t ch, struct Node *escontext, yyscan_t yyscanner) { if (ch == 0) { @@ -607,7 +607,7 @@ addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner) /* Add unicode character, processing any surrogate pairs */ static bool -addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner) +addUnicode(char32_t ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner) { if (is_utf16_surrogate_first(ch)) { @@ -655,7 +655,7 @@ parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner) for (i = 2; i < l; i += 2) /* skip '\u' */ { - int ch = 0; + char32_t ch = 0; int j, si; diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index 3dc611b50e1..1021e0d129b 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -15,7 +15,6 @@ #include "catalog/pg_collation.h" #include "common/unicode_case.h" #include "common/unicode_category.h" -#include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/pg_locale.h" @@ -36,6 +35,23 @@ struct WordBoundaryState }; /* + * In UTF-8, pg_wchar is guaranteed to be the code point value. + */ +static inline char32_t +to_char32(pg_wchar wc) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + return (char32_t) wc; +} + +static inline pg_wchar +to_pg_wchar(char32_t c32) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + return (pg_wchar) c32; +} + +/* * Simple word boundary iterator that draws boundaries each time the result of * pg_u_isalnum() changes. */ @@ -47,7 +63,7 @@ initcap_wbnext(void *state) while (wbstate->offset < wbstate->len && wbstate->str[wbstate->offset] != '\0') { - pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str + + char32_t u = utf8_to_unicode((unsigned char *) wbstate->str + wbstate->offset); bool curr_alnum = pg_u_isalnum(u, wbstate->posix); @@ -112,61 +128,61 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, static bool wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isdigit(wc, !locale->builtin.casemap_full); + return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full); } static bool wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isalpha(wc); + return pg_u_isalpha(to_char32(wc)); } static bool wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isalnum(wc, !locale->builtin.casemap_full); + return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full); } static bool wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isupper(wc); + return pg_u_isupper(to_char32(wc)); } static bool wc_islower_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_islower(wc); + return pg_u_islower(to_char32(wc)); } static bool wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isgraph(wc); + return pg_u_isgraph(to_char32(wc)); } static bool wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isprint(wc); + return pg_u_isprint(to_char32(wc)); } static bool wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_ispunct(wc, !locale->builtin.casemap_full); + return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full); } static bool wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isspace(wc); + return pg_u_isspace(to_char32(wc)); } static bool wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale) { - return pg_u_isxdigit(wc, !locale->builtin.casemap_full); + return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full); } static bool @@ -179,13 +195,13 @@ char_is_cased_builtin(char ch, pg_locale_t locale) static pg_wchar wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) { - return unicode_uppercase_simple(wc); + return to_pg_wchar(unicode_uppercase_simple(to_char32(wc))); } static pg_wchar wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) { - return unicode_lowercase_simple(wc); + return to_pg_wchar(unicode_lowercase_simple(to_char32(wc))); } static const struct ctype_methods ctype_methods_builtin = { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2c398cd9e5c..8d735786e51 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -5419,12 +5419,12 @@ unicode_assigned(PG_FUNCTION_ARGS) ereport(ERROR, (errmsg("Unicode categorization can only be performed if server encoding is UTF8"))); - /* convert to pg_wchar */ + /* convert to char32_t */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); p = (unsigned char *) VARDATA_ANY(input); for (int i = 0; i < size; i++) { - pg_wchar uchar = utf8_to_unicode(p); + char32_t uchar = utf8_to_unicode(p); int category = unicode_category(uchar); if (category == PG_U_UNASSIGNED) @@ -5443,24 +5443,24 @@ unicode_normalize_func(PG_FUNCTION_ARGS) char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; int size; - pg_wchar *input_chars; - pg_wchar *output_chars; + char32_t *input_chars; + char32_t *output_chars; unsigned char *p; text *result; int i; form = unicode_norm_form_from_string(formstr); - /* convert to pg_wchar */ + /* convert to char32_t */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc((size + 1) * sizeof(char32_t)); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { input_chars[i] = utf8_to_unicode(p); p += pg_utf_mblen(p); } - input_chars[i] = (pg_wchar) '\0'; + input_chars[i] = (char32_t) '\0'; Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input)); /* action */ @@ -5468,7 +5468,7 @@ unicode_normalize_func(PG_FUNCTION_ARGS) /* convert back to UTF-8 string */ size = 0; - for (pg_wchar *wp = output_chars; *wp; wp++) + for (char32_t *wp = output_chars; *wp; wp++) { unsigned char buf[4]; @@ -5480,7 +5480,7 @@ unicode_normalize_func(PG_FUNCTION_ARGS) SET_VARSIZE(result, size + VARHDRSZ); p = (unsigned char *) VARDATA_ANY(result); - for (pg_wchar *wp = output_chars; *wp; wp++) + for (char32_t *wp = output_chars; *wp; wp++) { unicode_to_utf8(*wp, p); p += pg_utf_mblen(p); @@ -5509,8 +5509,8 @@ unicode_is_normalized(PG_FUNCTION_ARGS) char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; int size; - pg_wchar *input_chars; - pg_wchar *output_chars; + char32_t *input_chars; + char32_t *output_chars; unsigned char *p; int i; UnicodeNormalizationQC quickcheck; @@ -5519,16 +5519,16 @@ unicode_is_normalized(PG_FUNCTION_ARGS) form = unicode_norm_form_from_string(formstr); - /* convert to pg_wchar */ + /* convert to char32_t */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc((size + 1) * sizeof(char32_t)); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { input_chars[i] = utf8_to_unicode(p); p += pg_utf_mblen(p); } - input_chars[i] = (pg_wchar) '\0'; + input_chars[i] = (char32_t) '\0'; Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input)); /* quick check (see UAX #15) */ @@ -5542,11 +5542,11 @@ unicode_is_normalized(PG_FUNCTION_ARGS) output_chars = unicode_normalize(form, input_chars); output_size = 0; - for (pg_wchar *wp = output_chars; *wp; wp++) + for (char32_t *wp = output_chars; *wp; wp++) output_size++; result = (size == output_size) && - (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0); + (memcmp(input_chars, output_chars, size * sizeof(char32_t)) == 0); PG_RETURN_BOOL(result); } @@ -5602,7 +5602,7 @@ unistr(PG_FUNCTION_ARGS) int len; StringInfoData str; text *result; - pg_wchar pair_first = 0; + char16_t pair_first = 0; char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; instr = VARDATA_ANY(input_text); @@ -5626,7 +5626,7 @@ unistr(PG_FUNCTION_ARGS) else if ((len >= 5 && isxdigits_n(instr + 1, 4)) || (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4))) { - pg_wchar unicode; + char32_t unicode; int offset = instr[1] == 'u' ? 2 : 1; unicode = hexval_n(instr + offset, 4); @@ -5662,7 +5662,7 @@ unistr(PG_FUNCTION_ARGS) } else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6)) { - pg_wchar unicode; + char32_t unicode; unicode = hexval_n(instr + 2, 6); @@ -5697,7 +5697,7 @@ unistr(PG_FUNCTION_ARGS) } else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8)) { - pg_wchar unicode; + char32_t unicode; unicode = hexval_n(instr + 2, 8); diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 886ecbad871..fb629ed5c8f 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -862,7 +862,7 @@ perform_default_encoding_conversion(const char *src, int len, * may call this outside any transaction, or in an aborted transaction. */ void -pg_unicode_to_server(pg_wchar c, unsigned char *s) +pg_unicode_to_server(char32_t c, unsigned char *s) { unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1]; int c_as_utf8_len; @@ -924,7 +924,7 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s) * but simply return false on conversion failure. */ bool -pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s) +pg_unicode_to_server_noerror(char32_t c, unsigned char *s) { unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1]; int c_as_utf8_len; |
