8 files changed, 80 insertions, 61 deletions
diff --git a/src/include/c.h b/src/include/c.h
index f4ec33e9b07..757dfff4782 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -1376,6 +1376,29 @@ typedef intptr_t sigjmp_buf[5];
 /* /port compatibility functions */
 #include "port.h"
 
+/*
+ * char16_t and char32_t
+ *      Unicode code points.
+ *
+ * uchar.h should always be available in C11, but it's not available on
+ * Mac. However, these types are keywords in C++11, so when using C++, we
+ * can't redefine the types.
+ *
+ * XXX: when uchar.h is available everywhere, we can remove this check and
+ * just include uchar.h unconditionally.
+ *
+ * XXX: this section is out of place because uchar.h needs to be included
+ * after port.h, due to an interaction with win32_port.h in some cases.
+ */
+#ifdef HAVE_UCHAR_H
+#include <uchar.h>
+#else
+#ifndef __cplusplus
+typedef uint16_t char16_t;
+typedef uint32_t char32_t;
+#endif
+#endif
+
 /* IWYU pragma: end_exports */
 
 #endif							/* C_H */
diff --git a/src/include/common/unicode_case.h b/src/include/common/unicode_case.h
index 41e2c1f4b33..6bcffd349c2 100644
--- a/src/include/common/unicode_case.h
+++ b/src/include/common/unicode_case.h
@@ -14,14 +14,12 @@
 #ifndef UNICODE_CASE_H
 #define UNICODE_CASE_H
 
-#include "mb/pg_wchar.h"
-
 typedef size_t (*WordBoundaryNext) (void *wbstate);
 
-pg_wchar	unicode_lowercase_simple(pg_wchar code);
-pg_wchar	unicode_titlecase_simple(pg_wchar code);
-pg_wchar	unicode_uppercase_simple(pg_wchar code);
-pg_wchar	unicode_casefold_simple(pg_wchar code);
+char32_t	unicode_lowercase_simple(char32_t code);
+char32_t	unicode_titlecase_simple(char32_t code);
+char32_t	unicode_uppercase_simple(char32_t code);
+char32_t	unicode_casefold_simple(char32_t code);
 size_t		unicode_strlower(char *dst, size_t dstsize, const char *src,
 							 ssize_t srclen, bool full);
 size_t		unicode_strtitle(char *dst, size_t dstsize, const char *src,
diff --git a/src/include/common/unicode_case_table.h b/src/include/common/unicode_case_table.h
index d5311786582..0a14fb2d97b 100644
--- a/src/include/common/unicode_case_table.h
+++ b/src/include/common/unicode_case_table.h
@@ -18,7 +18,6 @@
  */
 
 #include "common/unicode_case.h"
-#include "mb/pg_wchar.h"
 
 /*
  * The maximum number of codepoints that can result from case mapping
@@ -45,7 +44,7 @@ typedef enum
 typedef struct
 {
 	int16		conditions;
-	pg_wchar	map[NCaseKind][MAX_CASE_EXPANSION];
+	char32_t	map[NCaseKind][MAX_CASE_EXPANSION];
 } pg_special_case;
 
 /*
@@ -166,7 +165,7 @@ static const pg_special_case special_case[106] =
  * The entry case_map_lower[case_index(codepoint)] is the mapping for the
  * given codepoint.
  */
-static const pg_wchar case_map_lower[1704] =
+static const char32_t case_map_lower[1704] =
 {
 	0x000000,					/* reserved */
 	0x000000,					/* U+000000 */
@@ -1879,7 +1878,7 @@ static const pg_wchar case_map_lower[1704] =
  * The entry case_map_title[case_index(codepoint)] is the mapping for the
  * given codepoint.
  */
-static const pg_wchar case_map_title[1704] =
+static const char32_t case_map_title[1704] =
 {
 	0x000000,					/* reserved */
 	0x000000,					/* U+000000 */
@@ -3592,7 +3591,7 @@ static const pg_wchar case_map_title[1704] =
  * The entry case_map_upper[case_index(codepoint)] is the mapping for the
  * given codepoint.
  */
-static const pg_wchar case_map_upper[1704] =
+static const char32_t case_map_upper[1704] =
 {
 	0x000000,					/* reserved */
 	0x000000,					/* U+000000 */
@@ -5305,7 +5304,7 @@ static const pg_wchar case_map_upper[1704] =
  * The entry case_map_fold[case_index(codepoint)] is the mapping for the
  * given codepoint.
  */
-static const pg_wchar case_map_fold[1704] =
+static const char32_t case_map_fold[1704] =
 {
 	0x000000,					/* reserved */
 	0x000000,					/* U+000000 */
@@ -13522,7 +13521,7 @@ static const uint16 case_map[4778] =
  * the offset into the mapping tables.
  */
 static inline uint16
-case_index(pg_wchar cp)
+case_index(char32_t cp)
 {
 	/* Fast path for codepoints < 0x0588 */
 	if (cp < 0x0588)
diff --git a/src/include/common/unicode_category.h b/src/include/common/unicode_category.h
index 8fd8b67a416..684143d3c8a 100644
--- a/src/include/common/unicode_category.h
+++ b/src/include/common/unicode_category.h
@@ -14,8 +14,6 @@
 #ifndef UNICODE_CATEGORY_H
 #define UNICODE_CATEGORY_H
 
-#include "mb/pg_wchar.h"
-
 /*
  * Unicode General Category Values
  *
@@ -61,31 +59,31 @@ typedef enum pg_unicode_category
 	PG_U_FINAL_PUNCTUATION = 29 /* Pf */
 } pg_unicode_category;
 
-extern pg_unicode_category unicode_category(pg_wchar code);
+extern pg_unicode_category unicode_category(char32_t code);
 extern const char *unicode_category_string(pg_unicode_category category);
 extern const char *unicode_category_abbrev(pg_unicode_category category);
 
-extern bool pg_u_prop_alphabetic(pg_wchar code);
-extern bool pg_u_prop_lowercase(pg_wchar code);
-extern bool pg_u_prop_uppercase(pg_wchar code);
-extern bool pg_u_prop_cased(pg_wchar code);
-extern bool pg_u_prop_case_ignorable(pg_wchar code);
-extern bool pg_u_prop_white_space(pg_wchar code);
-extern bool pg_u_prop_hex_digit(pg_wchar code);
-extern bool pg_u_prop_join_control(pg_wchar code);
+extern bool pg_u_prop_alphabetic(char32_t code);
+extern bool pg_u_prop_lowercase(char32_t code);
+extern bool pg_u_prop_uppercase(char32_t code);
+extern bool pg_u_prop_cased(char32_t code);
+extern bool pg_u_prop_case_ignorable(char32_t code);
+extern bool pg_u_prop_white_space(char32_t code);
+extern bool pg_u_prop_hex_digit(char32_t code);
+extern bool pg_u_prop_join_control(char32_t code);
 
-extern bool pg_u_isdigit(pg_wchar code, bool posix);
-extern bool pg_u_isalpha(pg_wchar code);
-extern bool pg_u_isalnum(pg_wchar code, bool posix);
-extern bool pg_u_isword(pg_wchar code);
-extern bool pg_u_isupper(pg_wchar code);
-extern bool pg_u_islower(pg_wchar code);
-extern bool pg_u_isblank(pg_wchar code);
-extern bool pg_u_iscntrl(pg_wchar code);
-extern bool pg_u_isgraph(pg_wchar code);
-extern bool pg_u_isprint(pg_wchar code);
-extern bool pg_u_ispunct(pg_wchar code, bool posix);
-extern bool pg_u_isspace(pg_wchar code);
-extern bool pg_u_isxdigit(pg_wchar code, bool posix);
+extern bool pg_u_isdigit(char32_t code, bool posix);
+extern bool pg_u_isalpha(char32_t code);
+extern bool pg_u_isalnum(char32_t code, bool posix);
+extern bool pg_u_isword(char32_t code);
+extern bool pg_u_isupper(char32_t code);
+extern bool pg_u_islower(char32_t code);
+extern bool pg_u_isblank(char32_t code);
+extern bool pg_u_iscntrl(char32_t code);
+extern bool pg_u_isgraph(char32_t code);
+extern bool pg_u_isprint(char32_t code);
+extern bool pg_u_ispunct(char32_t code, bool posix);
+extern bool pg_u_isspace(char32_t code);
+extern bool pg_u_isxdigit(char32_t code, bool posix);
 
 #endif							/* UNICODE_CATEGORY_H */
diff --git a/src/include/common/unicode_category_table.h b/src/include/common/unicode_category_table.h
index 95a1c65da7e..466a41b72b0 100644
--- a/src/include/common/unicode_category_table.h
+++ b/src/include/common/unicode_category_table.h
@@ -20,15 +20,15 @@
  */
 typedef struct
 {
-	uint32		first;			/* Unicode codepoint */
-	uint32		last;			/* Unicode codepoint */
+	char32_t	first;			/* Unicode codepoint */
+	char32_t	last;			/* Unicode codepoint */
 	uint8		category;		/* General Category */
 } pg_category_range;
 
 typedef struct
 {
-	uint32		first;			/* Unicode codepoint */
-	uint32		last;			/* Unicode codepoint */
+	char32_t	first;			/* Unicode codepoint */
+	char32_t	last;			/* Unicode codepoint */
 } pg_unicode_range;
 
 typedef struct
diff --git a/src/include/common/unicode_norm.h b/src/include/common/unicode_norm.h
index 5bc3b79e78e..516c192cc4c 100644
--- a/src/include/common/unicode_norm.h
+++ b/src/include/common/unicode_norm.h
@@ -14,8 +14,6 @@
 #ifndef UNICODE_NORM_H
 #define UNICODE_NORM_H
 
-#include "mb/pg_wchar.h"
-
 typedef enum
 {
 	UNICODE_NFC = 0,
@@ -32,8 +30,8 @@ typedef enum
 	UNICODE_NORM_QC_MAYBE = -1,
 } UnicodeNormalizationQC;
 
-extern pg_wchar *unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input);
+extern char32_t *unicode_normalize(UnicodeNormalizationForm form, const char32_t *input);
 
-extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input);
+extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input);
 
 #endif							/* UNICODE_NORM_H */
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 4b4a9974b75..4d84bdc81e4 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -532,25 +532,25 @@ typedef uint32 (*utf_local_conversion_func) (uint32 code);
  * Some handy functions for Unicode-specific tests.
  */
 static inline bool
-is_valid_unicode_codepoint(pg_wchar c)
+is_valid_unicode_codepoint(char32_t c)
 {
 	return (c > 0 && c <= 0x10FFFF);
 }
 
 static inline bool
-is_utf16_surrogate_first(pg_wchar c)
+is_utf16_surrogate_first(char32_t c)
 {
 	return (c >= 0xD800 && c <= 0xDBFF);
 }
 
 static inline bool
-is_utf16_surrogate_second(pg_wchar c)
+is_utf16_surrogate_second(char32_t c)
 {
 	return (c >= 0xDC00 && c <= 0xDFFF);
 }
 
-static inline pg_wchar
-surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
+static inline char32_t
+surrogate_pair_to_codepoint(char16_t first, char16_t second)
 {
 	return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
 }
@@ -561,20 +561,20 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
  *
  * No error checks here, c must point to a long-enough string.
  */
-static inline pg_wchar
+static inline char32_t
 utf8_to_unicode(const unsigned char *c)
 {
 	if ((*c & 0x80) == 0)
-		return (pg_wchar) c[0];
+		return (char32_t) c[0];
 	else if ((*c & 0xe0) == 0xc0)
-		return (pg_wchar) (((c[0] & 0x1f) << 6) |
+		return (char32_t) (((c[0] & 0x1f) << 6) |
 						   (c[1] & 0x3f));
 	else if ((*c & 0xf0) == 0xe0)
-		return (pg_wchar) (((c[0] & 0x0f) << 12) |
+		return (char32_t) (((c[0] & 0x0f) << 12) |
 						   ((c[1] & 0x3f) << 6) |
 						   (c[2] & 0x3f));
 	else if ((*c & 0xf8) == 0xf0)
-		return (pg_wchar) (((c[0] & 0x07) << 18) |
+		return (char32_t) (((c[0] & 0x07) << 18) |
 						   ((c[1] & 0x3f) << 12) |
 						   ((c[2] & 0x3f) << 6) |
 						   (c[3] & 0x3f));
@@ -588,7 +588,7 @@ utf8_to_unicode(const unsigned char *c)
  * unicode_utf8len(c) bytes available.
  */
 static inline unsigned char *
-unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+unicode_to_utf8(char32_t c, unsigned char *utf8string)
 {
 	if (c <= 0x7F)
 	{
@@ -620,7 +620,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
  * Number of bytes needed to represent the given char in UTF8.
  */
 static inline int
-unicode_utf8len(pg_wchar c)
+unicode_utf8len(char32_t c)
 {
 	if (c <= 0x7F)
 		return 1;
@@ -676,8 +676,8 @@ extern int	pg_valid_server_encoding(const char *name);
 extern bool is_encoding_supported_by_icu(int encoding);
 extern const char *get_encoding_name_for_icu(int encoding);
 
-extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
-extern pg_wchar utf8_to_unicode(const unsigned char *c);
+extern unsigned char *unicode_to_utf8(char32_t c, unsigned char *utf8string);
+extern char32_t utf8_to_unicode(const unsigned char *c);
 extern bool pg_utf8_islegal(const unsigned char *source, int length);
 extern int	pg_utf_mblen(const unsigned char *s);
 extern int	pg_mule_mblen(const unsigned char *s);
@@ -739,8 +739,8 @@ extern char *pg_server_to_client(const char *s, int len);
 extern char *pg_any_to_server(const char *s, int len, int encoding);
 extern char *pg_server_to_any(const char *s, int len, int encoding);
 
-extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
-extern bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s);
+extern void pg_unicode_to_server(char32_t c, unsigned char *s);
+extern bool pg_unicode_to_server_noerror(char32_t c, unsigned char *s);
 
 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 08d7bfbee10..f52f14cc566 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -463,6 +463,9 @@
 /* Define to 1 if you have the <termios.h> header file. */
 #undef HAVE_TERMIOS_H
 
+/* Define to 1 if you have the <uchar.h> header file. */
+#undef HAVE_UCHAR_H
+
 /* Define to 1 if curl_global_init() is guaranteed to be thread-safe. */
 #undef HAVE_THREADSAFE_CURL_GLOBAL_INIT