summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2025-10-28 10:49:20 -0700
committerJeff Davis <jdavis@postgresql.org>2025-10-28 10:49:20 -0700
commit4da12e9e2e3c011a3fc8354ca451d6a82c017fa3 (patch)
treebae1469d81725b81e206ec71e9b2538d22b1a8a2
parent35e53b68418a1d06f899d4bb41be88d18f9dcb7b (diff)
Move comment about casts from pg_wchar.
Suggested-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/CA+hUKGLXQUYK7Cq5KbLGgTWo7pORs7yhBWO1AEnZt7xTYbLRhg@mail.gmail.com
-rw-r--r--src/backend/utils/adt/pg_locale_icu.c5
-rw-r--r--src/backend/utils/adt/pg_locale_libc.c3
2 files changed, 6 insertions, 2 deletions
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 05bad202669..f5a0cc8fe41 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -128,6 +128,11 @@ char_is_cased_icu(char ch, pg_locale_t locale)
(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
}
+/*
+ * XXX: many of the functions below rely on casts directly from pg_wchar to
+ * UChar32, which is correct for the UTF-8 encoding, but not in general.
+ */
+
static pg_wchar
toupper_icu(pg_wchar wc, pg_locale_t locale)
{
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 7ae778dc296..9c7fcd1fc7a 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -45,8 +45,7 @@
*
* 2. When working in UTF8 encoding, we use the <wctype.h> functions.
* This assumes that every platform uses Unicode codepoints directly
- * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
- * even for non-UTF8 encodings, which may be a problem.) On some platforms
+ * as the wchar_t representation of Unicode. On some platforms
* wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
*
* 3. In all other encodings, we use the <ctype.h> functions for pg_wchar