diff options
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/c.h | 23 | ||||
| -rw-r--r-- | src/include/common/unicode_case.h | 10 | ||||
| -rw-r--r-- | src/include/common/unicode_case_table.h | 13 | ||||
| -rw-r--r-- | src/include/common/unicode_category.h | 46 | ||||
| -rw-r--r-- | src/include/common/unicode_category_table.h | 8 | ||||
| -rw-r--r-- | src/include/common/unicode_norm.h | 6 | ||||
| -rw-r--r-- | src/include/mb/pg_wchar.h | 32 | ||||
| -rw-r--r-- | src/include/pg_config.h.in | 3 |
8 files changed, 80 insertions, 61 deletions
diff --git a/src/include/c.h b/src/include/c.h index f4ec33e9b07..757dfff4782 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -1376,6 +1376,29 @@ typedef intptr_t sigjmp_buf[5]; /* /port compatibility functions */ #include "port.h" +/* + * char16_t and char32_t + * Unicode code points. + * + * uchar.h should always be available in C11, but it's not available on + * Mac. However, these types are keywords in C++11, so when using C++, we + * can't redefine the types. + * + * XXX: when uchar.h is available everywhere, we can remove this check and + * just include uchar.h unconditionally. + * + * XXX: this section is out of place because uchar.h needs to be included + * after port.h, due to an interaction with win32_port.h in some cases. + */ +#ifdef HAVE_UCHAR_H +#include <uchar.h> +#else +#ifndef __cplusplus +typedef uint16_t char16_t; +typedef uint32_t char32_t; +#endif +#endif + /* IWYU pragma: end_exports */ #endif /* C_H */ diff --git a/src/include/common/unicode_case.h b/src/include/common/unicode_case.h index 41e2c1f4b33..6bcffd349c2 100644 --- a/src/include/common/unicode_case.h +++ b/src/include/common/unicode_case.h @@ -14,14 +14,12 @@ #ifndef UNICODE_CASE_H #define UNICODE_CASE_H -#include "mb/pg_wchar.h" - typedef size_t (*WordBoundaryNext) (void *wbstate); -pg_wchar unicode_lowercase_simple(pg_wchar code); -pg_wchar unicode_titlecase_simple(pg_wchar code); -pg_wchar unicode_uppercase_simple(pg_wchar code); -pg_wchar unicode_casefold_simple(pg_wchar code); +char32_t unicode_lowercase_simple(char32_t code); +char32_t unicode_titlecase_simple(char32_t code); +char32_t unicode_uppercase_simple(char32_t code); +char32_t unicode_casefold_simple(char32_t code); size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full); size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, diff --git a/src/include/common/unicode_case_table.h b/src/include/common/unicode_case_table.h index d5311786582..0a14fb2d97b 100644 --- a/src/include/common/unicode_case_table.h +++ b/src/include/common/unicode_case_table.h @@ -18,7 +18,6 @@ */ #include "common/unicode_case.h" -#include "mb/pg_wchar.h" /* * The maximum number of codepoints that can result from case mapping @@ -45,7 +44,7 @@ typedef enum typedef struct { int16 conditions; - pg_wchar map[NCaseKind][MAX_CASE_EXPANSION]; + char32_t map[NCaseKind][MAX_CASE_EXPANSION]; } pg_special_case; /* @@ -166,7 +165,7 @@ static const pg_special_case special_case[106] = * The entry case_map_lower[case_index(codepoint)] is the mapping for the * given codepoint. */ -static const pg_wchar case_map_lower[1704] = +static const char32_t case_map_lower[1704] = { 0x000000, /* reserved */ 0x000000, /* U+000000 */ @@ -1879,7 +1878,7 @@ static const pg_wchar case_map_lower[1704] = * The entry case_map_title[case_index(codepoint)] is the mapping for the * given codepoint. */ -static const pg_wchar case_map_title[1704] = +static const char32_t case_map_title[1704] = { 0x000000, /* reserved */ 0x000000, /* U+000000 */ @@ -3592,7 +3591,7 @@ static const pg_wchar case_map_title[1704] = * The entry case_map_upper[case_index(codepoint)] is the mapping for the * given codepoint. */ -static const pg_wchar case_map_upper[1704] = +static const char32_t case_map_upper[1704] = { 0x000000, /* reserved */ 0x000000, /* U+000000 */ @@ -5305,7 +5304,7 @@ static const pg_wchar case_map_upper[1704] = * The entry case_map_fold[case_index(codepoint)] is the mapping for the * given codepoint. */ -static const pg_wchar case_map_fold[1704] = +static const char32_t case_map_fold[1704] = { 0x000000, /* reserved */ 0x000000, /* U+000000 */ @@ -13522,7 +13521,7 @@ static const uint16 case_map[4778] = * the offset into the mapping tables. */ static inline uint16 -case_index(pg_wchar cp) +case_index(char32_t cp) { /* Fast path for codepoints < 0x0588 */ if (cp < 0x0588) diff --git a/src/include/common/unicode_category.h b/src/include/common/unicode_category.h index 8fd8b67a416..684143d3c8a 100644 --- a/src/include/common/unicode_category.h +++ b/src/include/common/unicode_category.h @@ -14,8 +14,6 @@ #ifndef UNICODE_CATEGORY_H #define UNICODE_CATEGORY_H -#include "mb/pg_wchar.h" - /* * Unicode General Category Values * @@ -61,31 +59,31 @@ typedef enum pg_unicode_category PG_U_FINAL_PUNCTUATION = 29 /* Pf */ } pg_unicode_category; -extern pg_unicode_category unicode_category(pg_wchar code); +extern pg_unicode_category unicode_category(char32_t code); extern const char *unicode_category_string(pg_unicode_category category); extern const char *unicode_category_abbrev(pg_unicode_category category); -extern bool pg_u_prop_alphabetic(pg_wchar code); -extern bool pg_u_prop_lowercase(pg_wchar code); -extern bool pg_u_prop_uppercase(pg_wchar code); -extern bool pg_u_prop_cased(pg_wchar code); -extern bool pg_u_prop_case_ignorable(pg_wchar code); -extern bool pg_u_prop_white_space(pg_wchar code); -extern bool pg_u_prop_hex_digit(pg_wchar code); -extern bool pg_u_prop_join_control(pg_wchar code); +extern bool pg_u_prop_alphabetic(char32_t code); +extern bool pg_u_prop_lowercase(char32_t code); +extern bool pg_u_prop_uppercase(char32_t code); +extern bool pg_u_prop_cased(char32_t code); +extern bool pg_u_prop_case_ignorable(char32_t code); +extern bool pg_u_prop_white_space(char32_t code); +extern bool pg_u_prop_hex_digit(char32_t code); +extern bool pg_u_prop_join_control(char32_t code); -extern bool pg_u_isdigit(pg_wchar code, bool posix); -extern bool pg_u_isalpha(pg_wchar code); -extern bool pg_u_isalnum(pg_wchar code, bool posix); -extern bool pg_u_isword(pg_wchar code); -extern bool pg_u_isupper(pg_wchar code); -extern bool pg_u_islower(pg_wchar code); -extern bool pg_u_isblank(pg_wchar code); -extern bool pg_u_iscntrl(pg_wchar code); -extern bool pg_u_isgraph(pg_wchar code); -extern bool pg_u_isprint(pg_wchar code); -extern bool pg_u_ispunct(pg_wchar code, bool posix); -extern bool pg_u_isspace(pg_wchar code); -extern bool pg_u_isxdigit(pg_wchar code, bool posix); +extern bool pg_u_isdigit(char32_t code, bool posix); +extern bool pg_u_isalpha(char32_t code); +extern bool pg_u_isalnum(char32_t code, bool posix); +extern bool pg_u_isword(char32_t code); +extern bool pg_u_isupper(char32_t code); +extern bool pg_u_islower(char32_t code); +extern bool pg_u_isblank(char32_t code); +extern bool pg_u_iscntrl(char32_t code); +extern bool pg_u_isgraph(char32_t code); +extern bool pg_u_isprint(char32_t code); +extern bool pg_u_ispunct(char32_t code, bool posix); +extern bool pg_u_isspace(char32_t code); +extern bool pg_u_isxdigit(char32_t code, bool posix); #endif /* UNICODE_CATEGORY_H */ diff --git a/src/include/common/unicode_category_table.h b/src/include/common/unicode_category_table.h index 95a1c65da7e..466a41b72b0 100644 --- a/src/include/common/unicode_category_table.h +++ b/src/include/common/unicode_category_table.h @@ -20,15 +20,15 @@ */ typedef struct { - uint32 first; /* Unicode codepoint */ - uint32 last; /* Unicode codepoint */ + char32_t first; /* Unicode codepoint */ + char32_t last; /* Unicode codepoint */ uint8 category; /* General Category */ } pg_category_range; typedef struct { - uint32 first; /* Unicode codepoint */ - uint32 last; /* Unicode codepoint */ + char32_t first; /* Unicode codepoint */ + char32_t last; /* Unicode codepoint */ } pg_unicode_range; typedef struct diff --git a/src/include/common/unicode_norm.h b/src/include/common/unicode_norm.h index 5bc3b79e78e..516c192cc4c 100644 --- a/src/include/common/unicode_norm.h +++ b/src/include/common/unicode_norm.h @@ -14,8 +14,6 @@ #ifndef UNICODE_NORM_H #define UNICODE_NORM_H -#include "mb/pg_wchar.h" - typedef enum { UNICODE_NFC = 0, @@ -32,8 +30,8 @@ typedef enum UNICODE_NORM_QC_MAYBE = -1, } UnicodeNormalizationQC; -extern pg_wchar *unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input); +extern char32_t *unicode_normalize(UnicodeNormalizationForm form, const char32_t *input); -extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input); +extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input); #endif /* UNICODE_NORM_H */ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 4b4a9974b75..4d84bdc81e4 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -532,25 +532,25 @@ typedef uint32 (*utf_local_conversion_func) (uint32 code); * Some handy functions for Unicode-specific tests. */ static inline bool -is_valid_unicode_codepoint(pg_wchar c) +is_valid_unicode_codepoint(char32_t c) { return (c > 0 && c <= 0x10FFFF); } static inline bool -is_utf16_surrogate_first(pg_wchar c) +is_utf16_surrogate_first(char32_t c) { return (c >= 0xD800 && c <= 0xDBFF); } static inline bool -is_utf16_surrogate_second(pg_wchar c) +is_utf16_surrogate_second(char32_t c) { return (c >= 0xDC00 && c <= 0xDFFF); } -static inline pg_wchar -surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) +static inline char32_t +surrogate_pair_to_codepoint(char16_t first, char16_t second) { return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF); } @@ -561,20 +561,20 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) * * No error checks here, c must point to a long-enough string. */ -static inline pg_wchar +static inline char32_t utf8_to_unicode(const unsigned char *c) { if ((*c & 0x80) == 0) - return (pg_wchar) c[0]; + return (char32_t) c[0]; else if ((*c & 0xe0) == 0xc0) - return (pg_wchar) (((c[0] & 0x1f) << 6) | + return (char32_t) (((c[0] & 0x1f) << 6) | (c[1] & 0x3f)); else if ((*c & 0xf0) == 0xe0) - return (pg_wchar) (((c[0] & 0x0f) << 12) | + return (char32_t) (((c[0] & 0x0f) << 12) | ((c[1] & 0x3f) << 6) | (c[2] & 0x3f)); else if ((*c & 0xf8) == 0xf0) - return (pg_wchar) (((c[0] & 0x07) << 18) | + return (char32_t) (((c[0] & 0x07) << 18) | ((c[1] & 0x3f) << 12) | ((c[2] & 0x3f) << 6) | (c[3] & 0x3f)); @@ -588,7 +588,7 @@ utf8_to_unicode(const unsigned char *c) * unicode_utf8len(c) bytes available. */ static inline unsigned char * -unicode_to_utf8(pg_wchar c, unsigned char *utf8string) +unicode_to_utf8(char32_t c, unsigned char *utf8string) { if (c <= 0x7F) { @@ -620,7 +620,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string) * Number of bytes needed to represent the given char in UTF8. */ static inline int -unicode_utf8len(pg_wchar c) +unicode_utf8len(char32_t c) { if (c <= 0x7F) return 1; @@ -676,8 +676,8 @@ extern int pg_valid_server_encoding(const char *name); extern bool is_encoding_supported_by_icu(int encoding); extern const char *get_encoding_name_for_icu(int encoding); -extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string); -extern pg_wchar utf8_to_unicode(const unsigned char *c); +extern unsigned char *unicode_to_utf8(char32_t c, unsigned char *utf8string); +extern char32_t utf8_to_unicode(const unsigned char *c); extern bool pg_utf8_islegal(const unsigned char *source, int length); extern int pg_utf_mblen(const unsigned char *s); extern int pg_mule_mblen(const unsigned char *s); @@ -739,8 +739,8 @@ extern char *pg_server_to_client(const char *s, int len); extern char *pg_any_to_server(const char *s, int len, int encoding); extern char *pg_server_to_any(const char *s, int len, int encoding); -extern void pg_unicode_to_server(pg_wchar c, unsigned char *s); -extern bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s); +extern void pg_unicode_to_server(char32_t c, unsigned char *s); +extern bool pg_unicode_to_server_noerror(char32_t c, unsigned char *s); extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc); extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 08d7bfbee10..f52f14cc566 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -463,6 +463,9 @@ /* Define to 1 if you have the <termios.h> header file. */ #undef HAVE_TERMIOS_H +/* Define to 1 if you have the <uchar.h> header file. */ +#undef HAVE_UCHAR_H + /* Define to 1 if curl_global_init() is guaranteed to be thread-safe. */ #undef HAVE_THREADSAFE_CURL_GLOBAL_INIT |
