summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/pg_locale_builtin.c
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2025-10-29 14:17:13 -0700
committerJeff Davis <jdavis@postgresql.org>2025-10-29 14:17:13 -0700
commit3853a6956c3e3bc7a6fa9bcdb205a2997f46bac2 (patch)
treed53058de062edbdbae7fadccf5f40b11147d2293 /src/backend/utils/adt/pg_locale_builtin.c
parent16edc1b94fc2db6e6a376471e280b50a418907c2 (diff)
Use C11 char16_t and char32_t for Unicode code points.
Reviewed-by: Tatsuo Ishii <ishii@postgresql.org> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/bedcc93d06203dfd89815b10f815ca2de8626e85.camel%40j-davis.com
Diffstat (limited to 'src/backend/utils/adt/pg_locale_builtin.c')
-rw-r--r--src/backend/utils/adt/pg_locale_builtin.c44
1 files changed, 30 insertions, 14 deletions
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 3dc611b50e1..1021e0d129b 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -15,7 +15,6 @@
#include "catalog/pg_collation.h"
#include "common/unicode_case.h"
#include "common/unicode_category.h"
-#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/pg_locale.h"
@@ -36,6 +35,23 @@ struct WordBoundaryState
};
/*
+ * In UTF-8, pg_wchar is guaranteed to be the code point value.
+ */
+static inline char32_t
+to_char32(pg_wchar wc)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+ return (char32_t) wc;
+}
+
+static inline pg_wchar
+to_pg_wchar(char32_t c32)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+ return (pg_wchar) c32;
+}
+
+/*
* Simple word boundary iterator that draws boundaries each time the result of
* pg_u_isalnum() changes.
*/
@@ -47,7 +63,7 @@ initcap_wbnext(void *state)
while (wbstate->offset < wbstate->len &&
wbstate->str[wbstate->offset] != '\0')
{
- pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
+ char32_t u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
@@ -112,61 +128,61 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
static bool
wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isdigit(wc, !locale->builtin.casemap_full);
+ return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
}
static bool
wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isalpha(wc);
+ return pg_u_isalpha(to_char32(wc));
}
static bool
wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isalnum(wc, !locale->builtin.casemap_full);
+ return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
}
static bool
wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isupper(wc);
+ return pg_u_isupper(to_char32(wc));
}
static bool
wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_islower(wc);
+ return pg_u_islower(to_char32(wc));
}
static bool
wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isgraph(wc);
+ return pg_u_isgraph(to_char32(wc));
}
static bool
wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isprint(wc);
+ return pg_u_isprint(to_char32(wc));
}
static bool
wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_ispunct(wc, !locale->builtin.casemap_full);
+ return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
}
static bool
wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isspace(wc);
+ return pg_u_isspace(to_char32(wc));
}
static bool
wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
{
- return pg_u_isxdigit(wc, !locale->builtin.casemap_full);
+ return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
}
static bool
@@ -179,13 +195,13 @@ char_is_cased_builtin(char ch, pg_locale_t locale)
static pg_wchar
wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
{
- return unicode_uppercase_simple(wc);
+ return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
}
static pg_wchar
wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
{
- return unicode_lowercase_simple(wc);
+ return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
}
static const struct ctype_methods ctype_methods_builtin = {