diff options
Diffstat (limited to 'src/common')
| -rw-r--r-- | src/common/logging.c | 4 | ||||
| -rw-r--r-- | src/common/saslprep.c | 48 | ||||
| -rw-r--r-- | src/common/string.c | 2 | ||||
| -rw-r--r-- | src/common/unicode/case_test.c | 23 | ||||
| -rw-r--r-- | src/common/unicode/category_test.c | 3 | ||||
| -rw-r--r-- | src/common/unicode/generate-norm_test_table.pl | 4 | ||||
| -rw-r--r-- | src/common/unicode/generate-unicode_case_table.pl | 7 | ||||
| -rw-r--r-- | src/common/unicode/generate-unicode_category_table.pl | 8 | ||||
| -rw-r--r-- | src/common/unicode/norm_test.c | 6 | ||||
| -rw-r--r-- | src/common/unicode_case.c | 56 | ||||
| -rw-r--r-- | src/common/unicode_category.c | 50 | ||||
| -rw-r--r-- | src/common/unicode_norm.c | 56 | 
12 files changed, 134 insertions, 133 deletions
diff --git a/src/common/logging.c b/src/common/logging.c index 125a172af80..7319a5b4e20 100644 --- a/src/common/logging.c +++ b/src/common/logging.c @@ -206,7 +206,7 @@ pg_logging_set_locus_callback(void (*cb) (const char **filename, uint64 *lineno)  void  pg_log_generic(enum pg_log_level level, enum pg_log_part part, -			   const char *pg_restrict fmt,...) +			   const char *restrict fmt,...)  {  	va_list		ap; @@ -217,7 +217,7 @@ pg_log_generic(enum pg_log_level level, enum pg_log_part part,  void  pg_log_generic_v(enum pg_log_level level, enum pg_log_part part, -				 const char *pg_restrict fmt, va_list ap) +				 const char *restrict fmt, va_list ap)  {  	int			save_errno = errno;  	const char *filename = NULL; diff --git a/src/common/saslprep.c b/src/common/saslprep.c index 97beb47940b..101e8d65a4d 100644 --- a/src/common/saslprep.c +++ b/src/common/saslprep.c @@ -47,7 +47,7 @@  /* Prototypes for local functions */  static int	codepoint_range_cmp(const void *a, const void *b); -static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize); +static bool is_code_in_table(char32_t code, const char32_t *map, int mapsize);  static int	pg_utf8_string_len(const char *source);  /* @@ -64,7 +64,7 @@ static int	pg_utf8_string_len(const char *source);   *   * These are all mapped to the ASCII space character (U+00A0).   */ -static const pg_wchar non_ascii_space_ranges[] = +static const char32_t non_ascii_space_ranges[] =  {  	0x00A0, 0x00A0,  	0x1680, 0x1680, @@ -79,7 +79,7 @@ static const pg_wchar non_ascii_space_ranges[] =   *   * If any of these appear in the input, they are removed.   */ -static const pg_wchar commonly_mapped_to_nothing_ranges[] = +static const char32_t commonly_mapped_to_nothing_ranges[] =  {  	0x00AD, 0x00AD,  	0x034F, 0x034F, @@ -114,7 +114,7 @@ static const pg_wchar commonly_mapped_to_nothing_ranges[] =   * tables, so one code might originate from multiple source tables.   * Adjacent ranges have also been merged together, to save space.   */ -static const pg_wchar prohibited_output_ranges[] = +static const char32_t prohibited_output_ranges[] =  {  	0x0000, 0x001F,				/* C.2.1 */  	0x007F, 0x00A0,				/* C.1.2, C.2.1, C.2.2 */ @@ -155,7 +155,7 @@ static const pg_wchar prohibited_output_ranges[] =  };  /* A.1 Unassigned code points in Unicode 3.2 */ -static const pg_wchar unassigned_codepoint_ranges[] = +static const char32_t unassigned_codepoint_ranges[] =  {  	0x0221, 0x0221,  	0x0234, 0x024F, @@ -556,7 +556,7 @@ static const pg_wchar unassigned_codepoint_ranges[] =  };  /* D.1 Characters with bidirectional property "R" or "AL" */ -static const pg_wchar RandALCat_codepoint_ranges[] = +static const char32_t RandALCat_codepoint_ranges[] =  {  	0x05BE, 0x05BE,  	0x05C0, 0x05C0, @@ -595,7 +595,7 @@ static const pg_wchar RandALCat_codepoint_ranges[] =  };  /* D.2 Characters with bidirectional property "L" */ -static const pg_wchar LCat_codepoint_ranges[] = +static const char32_t LCat_codepoint_ranges[] =  {  	0x0041, 0x005A,  	0x0061, 0x007A, @@ -968,8 +968,8 @@ static const pg_wchar LCat_codepoint_ranges[] =  static int  codepoint_range_cmp(const void *a, const void *b)  { -	const pg_wchar *key = (const pg_wchar *) a; -	const pg_wchar *range = (const pg_wchar *) b; +	const char32_t *key = (const char32_t *) a; +	const char32_t *range = (const char32_t *) b;  	if (*key < range[0])  		return -1;				/* less than lower bound */ @@ -980,14 +980,14 @@ codepoint_range_cmp(const void *a, const void *b)  }  static bool -is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize) +is_code_in_table(char32_t code, const char32_t *map, int mapsize)  {  	Assert(mapsize % 2 == 0);  	if (code < map[0] || code > map[mapsize - 1])  		return false; -	if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2, +	if (bsearch(&code, map, mapsize / 2, sizeof(char32_t) * 2,  				codepoint_range_cmp))  		return true;  	else @@ -1046,8 +1046,8 @@ pg_utf8_string_len(const char *source)  pg_saslprep_rc  pg_saslprep(const char *input, char **output)  { -	pg_wchar   *input_chars = NULL; -	pg_wchar   *output_chars = NULL; +	char32_t   *input_chars = NULL; +	char32_t   *output_chars = NULL;  	int			input_size;  	char	   *result;  	int			result_size; @@ -1055,7 +1055,7 @@ pg_saslprep(const char *input, char **output)  	int			i;  	bool		contains_RandALCat;  	unsigned char *p; -	pg_wchar   *wp; +	char32_t   *wp;  	/* Ensure we return *output as NULL on failure */  	*output = NULL; @@ -1080,10 +1080,10 @@ pg_saslprep(const char *input, char **output)  	input_size = pg_utf8_string_len(input);  	if (input_size < 0)  		return SASLPREP_INVALID_UTF8; -	if (input_size >= MaxAllocSize / sizeof(pg_wchar)) +	if (input_size >= MaxAllocSize / sizeof(char32_t))  		goto oom; -	input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar)); +	input_chars = ALLOC((input_size + 1) * sizeof(char32_t));  	if (!input_chars)  		goto oom; @@ -1093,7 +1093,7 @@ pg_saslprep(const char *input, char **output)  		input_chars[i] = utf8_to_unicode(p);  		p += pg_utf_mblen(p);  	} -	input_chars[i] = (pg_wchar) '\0'; +	input_chars[i] = (char32_t) '\0';  	/*  	 * The steps below correspond to the steps listed in [RFC3454], Section @@ -1107,7 +1107,7 @@ pg_saslprep(const char *input, char **output)  	count = 0;  	for (i = 0; i < input_size; i++)  	{ -		pg_wchar	code = input_chars[i]; +		char32_t	code = input_chars[i];  		if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))  			input_chars[count++] = 0x0020; @@ -1118,7 +1118,7 @@ pg_saslprep(const char *input, char **output)  		else  			input_chars[count++] = code;  	} -	input_chars[count] = (pg_wchar) '\0'; +	input_chars[count] = (char32_t) '\0';  	input_size = count;  	if (input_size == 0) @@ -1138,7 +1138,7 @@ pg_saslprep(const char *input, char **output)  	 */  	for (i = 0; i < input_size; i++)  	{ -		pg_wchar	code = input_chars[i]; +		char32_t	code = input_chars[i];  		if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))  			goto prohibited; @@ -1170,7 +1170,7 @@ pg_saslprep(const char *input, char **output)  	contains_RandALCat = false;  	for (i = 0; i < input_size; i++)  	{ -		pg_wchar	code = input_chars[i]; +		char32_t	code = input_chars[i];  		if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))  		{ @@ -1181,12 +1181,12 @@ pg_saslprep(const char *input, char **output)  	if (contains_RandALCat)  	{ -		pg_wchar	first = input_chars[0]; -		pg_wchar	last = input_chars[input_size - 1]; +		char32_t	first = input_chars[0]; +		char32_t	last = input_chars[input_size - 1];  		for (i = 0; i < input_size; i++)  		{ -			pg_wchar	code = input_chars[i]; +			char32_t	code = input_chars[i];  			if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))  				goto prohibited; diff --git a/src/common/string.c b/src/common/string.c index d8a3129c3ba..95c7c07d502 100644 --- a/src/common/string.c +++ b/src/common/string.c @@ -47,7 +47,7 @@ pg_str_endswith(const char *str, const char *end)   * strtoint --- just like strtol, but returns int not long   */  int -strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) +strtoint(const char *restrict str, char **restrict endptr, int base)  {  	long		val; diff --git a/src/common/unicode/case_test.c b/src/common/unicode/case_test.c index fdfb62e8552..00d4f85e5a5 100644 --- a/src/common/unicode/case_test.c +++ b/src/common/unicode/case_test.c @@ -24,6 +24,7 @@  #include "common/unicode_case.h"  #include "common/unicode_category.h"  #include "common/unicode_version.h" +#include "mb/pg_wchar.h"  /* enough to hold largest source or result string, including NUL */  #define BUFSZ 256 @@ -54,7 +55,7 @@ initcap_wbnext(void *state)  	while (wbstate->offset < wbstate->len &&  		   wbstate->str[wbstate->offset] != '\0')  	{ -		pg_wchar	u = utf8_to_unicode((unsigned char *) wbstate->str + +		char32_t	u = utf8_to_unicode((unsigned char *) wbstate->str +  										wbstate->offset);  		bool		curr_alnum = pg_u_isalnum(u, wbstate->posix); @@ -77,16 +78,16 @@ initcap_wbnext(void *state)  #ifdef USE_ICU  static void -icu_test_simple(pg_wchar code) +icu_test_simple(char32_t code)  { -	pg_wchar	lower = unicode_lowercase_simple(code); -	pg_wchar	title = unicode_titlecase_simple(code); -	pg_wchar	upper = unicode_uppercase_simple(code); -	pg_wchar	fold = unicode_casefold_simple(code); -	pg_wchar	iculower = u_tolower(code); -	pg_wchar	icutitle = u_totitle(code); -	pg_wchar	icuupper = u_toupper(code); -	pg_wchar	icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT); +	char32_t	lower = unicode_lowercase_simple(code); +	char32_t	title = unicode_titlecase_simple(code); +	char32_t	upper = unicode_uppercase_simple(code); +	char32_t	fold = unicode_casefold_simple(code); +	char32_t	iculower = u_tolower(code); +	char32_t	icutitle = u_totitle(code); +	char32_t	icuupper = u_toupper(code); +	char32_t	icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);  	if (lower != iculower || title != icutitle || upper != icuupper ||  		fold != icufold) @@ -172,7 +173,7 @@ test_icu(void)  	int			successful = 0;  	int			skipped_mismatch = 0; -	for (pg_wchar code = 0; code <= 0x10ffff; code++) +	for (char32_t code = 0; code <= 0x10ffff; code++)  	{  		pg_unicode_category category = unicode_category(code); diff --git a/src/common/unicode/category_test.c b/src/common/unicode/category_test.c index 5d37ba39196..1e8c1f7905f 100644 --- a/src/common/unicode/category_test.c +++ b/src/common/unicode/category_test.c @@ -22,6 +22,7 @@  #include "common/unicode_category.h"  #include "common/unicode_version.h" +#include "mb/pg_wchar.h"  static int	pg_unicode_version = 0;  #ifdef USE_ICU @@ -59,7 +60,7 @@ icu_test()  	int			pg_skipped_codepoints = 0;  	int			icu_skipped_codepoints = 0; -	for (pg_wchar code = 0; code <= 0x10ffff; code++) +	for (char32_t code = 0; code <= 0x10ffff; code++)  	{  		uint8_t		pg_category = unicode_category(code);  		uint8_t		icu_category = u_charType(code); diff --git a/src/common/unicode/generate-norm_test_table.pl b/src/common/unicode/generate-norm_test_table.pl index 1b401be9409..1a8b908ff33 100644 --- a/src/common/unicode/generate-norm_test_table.pl +++ b/src/common/unicode/generate-norm_test_table.pl @@ -47,8 +47,8 @@ print $OUTPUT <<HEADER;  typedef struct  {  	int			linenum; -	pg_wchar	input[50]; -	pg_wchar	output[4][50]; +	char32_t	input[50]; +	char32_t	output[4][50];  } pg_unicode_test;  /* test table */ diff --git a/src/common/unicode/generate-unicode_case_table.pl b/src/common/unicode/generate-unicode_case_table.pl index 5d9ddd62803..f71eb25c94e 100644 --- a/src/common/unicode/generate-unicode_case_table.pl +++ b/src/common/unicode/generate-unicode_case_table.pl @@ -270,7 +270,6 @@ print $OT <<"EOS";   */  #include "common/unicode_case.h" -#include "mb/pg_wchar.h"  /*   * The maximum number of codepoints that can result from case mapping @@ -297,7 +296,7 @@ typedef enum  typedef struct  {  	int16		conditions; -	pg_wchar	map[NCaseKind][MAX_CASE_EXPANSION]; +	char32_t	map[NCaseKind][MAX_CASE_EXPANSION];  } pg_special_case;  /* @@ -430,7 +429,7 @@ foreach my $kind ('lower', 'title', 'upper', 'fold')   * The entry case_map_${kind}[case_index(codepoint)] is the mapping for the   * given codepoint.   */ -static const pg_wchar case_map_$kind\[$index\] = +static const char32_t case_map_$kind\[$index\] =  {  EOS @@ -502,7 +501,7 @@ print $OT <<"EOS";   * the offset into the mapping tables.   */  static inline uint16 -case_index(pg_wchar cp) +case_index(char32_t cp)  {  	/* Fast path for codepoints < $fastpath_limit */  	if (cp < $fastpath_limit) diff --git a/src/common/unicode/generate-unicode_category_table.pl b/src/common/unicode/generate-unicode_category_table.pl index abab5cd9696..7e094b13720 100644 --- a/src/common/unicode/generate-unicode_category_table.pl +++ b/src/common/unicode/generate-unicode_category_table.pl @@ -366,15 +366,15 @@ print $OT <<"EOS";   */  typedef struct  { -	uint32		first;			/* Unicode codepoint */ -	uint32		last;			/* Unicode codepoint */ +	char32_t	first;			/* Unicode codepoint */ +	char32_t	last;			/* Unicode codepoint */  	uint8		category;		/* General Category */  } pg_category_range;  typedef struct  { -	uint32		first;			/* Unicode codepoint */ -	uint32		last;			/* Unicode codepoint */ +	char32_t	first;			/* Unicode codepoint */ +	char32_t	last;			/* Unicode codepoint */  } pg_unicode_range;  typedef struct diff --git a/src/common/unicode/norm_test.c b/src/common/unicode/norm_test.c index 25bc59463f2..058817f1719 100644 --- a/src/common/unicode/norm_test.c +++ b/src/common/unicode/norm_test.c @@ -20,7 +20,7 @@  #include "norm_test_table.h"  static char * -print_wchar_str(const pg_wchar *s) +print_wchar_str(const char32_t *s)  {  #define BUF_DIGITS 50  	static char buf[BUF_DIGITS * 11 + 1]; @@ -41,7 +41,7 @@ print_wchar_str(const pg_wchar *s)  }  static int -pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2) +pg_wcscmp(const char32_t *s1, const char32_t *s2)  {  	for (;;)  	{ @@ -65,7 +65,7 @@ main(int argc, char **argv)  	{  		for (int form = 0; form < 4; form++)  		{ -			pg_wchar   *result; +			char32_t   *result;  			result = unicode_normalize(form, test->input); diff --git a/src/common/unicode_case.c b/src/common/unicode_case.c index 073faf6a0d5..e5e494db43c 100644 --- a/src/common/unicode_case.c +++ b/src/common/unicode_case.c @@ -30,7 +30,7 @@ enum CaseMapResult  /*   * Map for each case kind.   */ -static const pg_wchar *const casekind_map[NCaseKind] = +static const char32_t *const casekind_map[NCaseKind] =  {  	[CaseLower] = case_map_lower,  	[CaseTitle] = case_map_title, @@ -38,42 +38,42 @@ static const pg_wchar *const casekind_map[NCaseKind] =  	[CaseFold] = case_map_fold,  }; -static pg_wchar find_case_map(pg_wchar ucs, const pg_wchar *map); +static char32_t find_case_map(char32_t ucs, const char32_t *map);  static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,  						   CaseKind str_casekind, bool full, WordBoundaryNext wbnext,  						   void *wbstate); -static enum CaseMapResult casemap(pg_wchar u1, CaseKind casekind, bool full, +static enum CaseMapResult casemap(char32_t u1, CaseKind casekind, bool full,  								  const char *src, size_t srclen, size_t srcoff, -								  pg_wchar *simple, const pg_wchar **special); +								  char32_t *simple, const char32_t **special); -pg_wchar -unicode_lowercase_simple(pg_wchar code) +char32_t +unicode_lowercase_simple(char32_t code)  { -	pg_wchar	cp = find_case_map(code, case_map_lower); +	char32_t	cp = find_case_map(code, case_map_lower);  	return cp != 0 ? cp : code;  } -pg_wchar -unicode_titlecase_simple(pg_wchar code) +char32_t +unicode_titlecase_simple(char32_t code)  { -	pg_wchar	cp = find_case_map(code, case_map_title); +	char32_t	cp = find_case_map(code, case_map_title);  	return cp != 0 ? cp : code;  } -pg_wchar -unicode_uppercase_simple(pg_wchar code) +char32_t +unicode_uppercase_simple(char32_t code)  { -	pg_wchar	cp = find_case_map(code, case_map_upper); +	char32_t	cp = find_case_map(code, case_map_upper);  	return cp != 0 ? cp : code;  } -pg_wchar -unicode_casefold_simple(pg_wchar code) +char32_t +unicode_casefold_simple(char32_t code)  { -	pg_wchar	cp = find_case_map(code, case_map_fold); +	char32_t	cp = find_case_map(code, case_map_fold);  	return cp != 0 ? cp : code;  } @@ -231,10 +231,10 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,  	while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0')  	{ -		pg_wchar	u1 = utf8_to_unicode((unsigned char *) src + srcoff); +		char32_t	u1 = utf8_to_unicode((unsigned char *) src + srcoff);  		int			u1len = unicode_utf8len(u1); -		pg_wchar	simple = 0; -		const pg_wchar *special = NULL; +		char32_t	simple = 0; +		const char32_t *special = NULL;  		enum CaseMapResult casemap_result;  		if (str_casekind == CaseTitle) @@ -265,8 +265,8 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,  			case CASEMAP_SIMPLE:  				{  					/* replace with single character */ -					pg_wchar	u2 = simple; -					pg_wchar	u2len = unicode_utf8len(u2); +					char32_t	u2 = simple; +					char32_t	u2len = unicode_utf8len(u2);  					Assert(special == NULL);  					if (result_len + u2len <= dstsize) @@ -280,7 +280,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,  				Assert(simple == 0);  				for (int i = 0; i < MAX_CASE_EXPANSION && special[i]; i++)  				{ -					pg_wchar	u2 = special[i]; +					char32_t	u2 = special[i];  					size_t		u2len = unicode_utf8len(u2);  					if (result_len + u2len <= dstsize) @@ -320,7 +320,7 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)  	{  		if ((str[i] & 0x80) == 0 || (str[i] & 0xC0) == 0xC0)  		{ -			pg_wchar	curr = utf8_to_unicode(str + i); +			char32_t	curr = utf8_to_unicode(str + i);  			if (pg_u_prop_case_ignorable(curr))  				continue; @@ -344,7 +344,7 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)  	{  		if ((str[i] & 0x80) == 0 || (str[i] & 0xC0) == 0xC0)  		{ -			pg_wchar	curr = utf8_to_unicode(str + i); +			char32_t	curr = utf8_to_unicode(str + i);  			if (pg_u_prop_case_ignorable(curr))  				continue; @@ -394,9 +394,9 @@ check_special_conditions(int conditions, const char *str, size_t len,   * character without modification.   */  static enum CaseMapResult -casemap(pg_wchar u1, CaseKind casekind, bool full, +casemap(char32_t u1, CaseKind casekind, bool full,  		const char *src, size_t srclen, size_t srcoff, -		pg_wchar *simple, const pg_wchar **special) +		char32_t *simple, const char32_t **special)  {  	uint16		idx; @@ -434,8 +434,8 @@ casemap(pg_wchar u1, CaseKind casekind, bool full,   * Find entry in simple case map.   * If the entry does not exist, 0 will be returned.   */ -static pg_wchar -find_case_map(pg_wchar ucs, const pg_wchar *map) +static char32_t +find_case_map(char32_t ucs, const char32_t *map)  {  	/* Fast path for codepoints < 0x80 */  	if (ucs < 0x80) diff --git a/src/common/unicode_category.c b/src/common/unicode_category.c index 4136c4d4f92..aab667a7bb4 100644 --- a/src/common/unicode_category.c +++ b/src/common/unicode_category.c @@ -1,7 +1,7 @@  /*-------------------------------------------------------------------------   * unicode_category.c   *		Determine general category and character properties of Unicode - *		characters. Encoding must be UTF8, where we assume that the pg_wchar + *		characters. Encoding must be UTF8, where we assume that the char32_t   *		representation is a code point.   *   * Portions Copyright (c) 2017-2025, PostgreSQL Global Development Group @@ -76,13 +76,13 @@  #define PG_U_CHARACTER_TAB	0x09  static bool range_search(const pg_unicode_range *tbl, size_t size, -						 pg_wchar code); +						 char32_t code);  /*   * Unicode general category for the given codepoint.   */  pg_unicode_category -unicode_category(pg_wchar code) +unicode_category(char32_t code)  {  	int			min = 0;  	int			mid; @@ -108,7 +108,7 @@ unicode_category(pg_wchar code)  }  bool -pg_u_prop_alphabetic(pg_wchar code) +pg_u_prop_alphabetic(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC; @@ -119,7 +119,7 @@ pg_u_prop_alphabetic(pg_wchar code)  }  bool -pg_u_prop_lowercase(pg_wchar code) +pg_u_prop_lowercase(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE; @@ -130,7 +130,7 @@ pg_u_prop_lowercase(pg_wchar code)  }  bool -pg_u_prop_uppercase(pg_wchar code) +pg_u_prop_uppercase(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE; @@ -141,7 +141,7 @@ pg_u_prop_uppercase(pg_wchar code)  }  bool -pg_u_prop_cased(pg_wchar code) +pg_u_prop_cased(char32_t code)  {  	uint32		category_mask; @@ -156,7 +156,7 @@ pg_u_prop_cased(pg_wchar code)  }  bool -pg_u_prop_case_ignorable(pg_wchar code) +pg_u_prop_case_ignorable(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE; @@ -167,7 +167,7 @@ pg_u_prop_case_ignorable(pg_wchar code)  }  bool -pg_u_prop_white_space(pg_wchar code) +pg_u_prop_white_space(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE; @@ -178,7 +178,7 @@ pg_u_prop_white_space(pg_wchar code)  }  bool -pg_u_prop_hex_digit(pg_wchar code) +pg_u_prop_hex_digit(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT; @@ -189,7 +189,7 @@ pg_u_prop_hex_digit(pg_wchar code)  }  bool -pg_u_prop_join_control(pg_wchar code) +pg_u_prop_join_control(char32_t code)  {  	if (code < 0x80)  		return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL; @@ -208,7 +208,7 @@ pg_u_prop_join_control(pg_wchar code)   */  bool -pg_u_isdigit(pg_wchar code, bool posix) +pg_u_isdigit(char32_t code, bool posix)  {  	if (posix)  		return ('0' <= code && code <= '9'); @@ -217,19 +217,19 @@ pg_u_isdigit(pg_wchar code, bool posix)  }  bool -pg_u_isalpha(pg_wchar code) +pg_u_isalpha(char32_t code)  {  	return pg_u_prop_alphabetic(code);  }  bool -pg_u_isalnum(pg_wchar code, bool posix) +pg_u_isalnum(char32_t code, bool posix)  {  	return pg_u_isalpha(code) || pg_u_isdigit(code, posix);  }  bool -pg_u_isword(pg_wchar code) +pg_u_isword(char32_t code)  {  	uint32		category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); @@ -240,32 +240,32 @@ pg_u_isword(pg_wchar code)  }  bool -pg_u_isupper(pg_wchar code) +pg_u_isupper(char32_t code)  {  	return pg_u_prop_uppercase(code);  }  bool -pg_u_islower(pg_wchar code) +pg_u_islower(char32_t code)  {  	return pg_u_prop_lowercase(code);  }  bool -pg_u_isblank(pg_wchar code) +pg_u_isblank(char32_t code)  {  	return code == PG_U_CHARACTER_TAB ||  		unicode_category(code) == PG_U_SPACE_SEPARATOR;  }  bool -pg_u_iscntrl(pg_wchar code) +pg_u_iscntrl(char32_t code)  {  	return unicode_category(code) == PG_U_CONTROL;  }  bool -pg_u_isgraph(pg_wchar code) +pg_u_isgraph(char32_t code)  {  	uint32		category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); @@ -276,7 +276,7 @@ pg_u_isgraph(pg_wchar code)  }  bool -pg_u_isprint(pg_wchar code) +pg_u_isprint(char32_t code)  {  	pg_unicode_category category = unicode_category(code); @@ -287,7 +287,7 @@ pg_u_isprint(pg_wchar code)  }  bool -pg_u_ispunct(pg_wchar code, bool posix) +pg_u_ispunct(char32_t code, bool posix)  {  	uint32		category_mask; @@ -308,13 +308,13 @@ pg_u_ispunct(pg_wchar code, bool posix)  }  bool -pg_u_isspace(pg_wchar code) +pg_u_isspace(char32_t code)  {  	return pg_u_prop_white_space(code);  }  bool -pg_u_isxdigit(pg_wchar code, bool posix) +pg_u_isxdigit(char32_t code, bool posix)  {  	if (posix)  		return (('0' <= code && code <= '9') || @@ -478,7 +478,7 @@ unicode_category_abbrev(pg_unicode_category category)   * given table.   */  static bool -range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code) +range_search(const pg_unicode_range *tbl, size_t size, char32_t code)  {  	int			min = 0;  	int			mid; diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c index 6654b4cbc49..489d99cd5ab 100644 --- a/src/common/unicode_norm.c +++ b/src/common/unicode_norm.c @@ -69,7 +69,7 @@ conv_compare(const void *p1, const void *p2)   * lookup, while the frontend version uses a binary search.   */  static const pg_unicode_decomposition * -get_code_entry(pg_wchar code) +get_code_entry(char32_t code)  {  #ifndef FRONTEND  	int			h; @@ -109,7 +109,7 @@ get_code_entry(pg_wchar code)   * Get the combining class of the given codepoint.   */  static uint8 -get_canonical_class(pg_wchar code) +get_canonical_class(char32_t code)  {  	const pg_unicode_decomposition *entry = get_code_entry(code); @@ -130,15 +130,15 @@ get_canonical_class(pg_wchar code)   * Note: the returned pointer can point to statically allocated buffer, and   * is only valid until next call to this function!   */ -static const pg_wchar * +static const char32_t *  get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)  { -	static pg_wchar x; +	static char32_t x;  	if (DECOMPOSITION_IS_INLINE(entry))  	{  		Assert(DECOMPOSITION_SIZE(entry) == 1); -		x = (pg_wchar) entry->dec_index; +		x = (char32_t) entry->dec_index;  		*dec_size = 1;  		return &x;  	} @@ -156,7 +156,7 @@ get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)   * are, in turn, decomposable.   */  static int -get_decomposed_size(pg_wchar code, bool compat) +get_decomposed_size(char32_t code, bool compat)  {  	const pg_unicode_decomposition *entry;  	int			size = 0; @@ -318,7 +318,7 @@ recompose_code(uint32 start, uint32 code, uint32 *result)   * in the array result.   */  static void -decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current) +decompose_code(char32_t code, bool compat, char32_t **result, int *current)  {  	const pg_unicode_decomposition *entry;  	int			i; @@ -337,7 +337,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)  					v,  					tindex,  					sindex; -		pg_wchar   *res = *result; +		char32_t   *res = *result;  		sindex = code - SBASE;  		l = LBASE + sindex / (VCOUNT * TCOUNT); @@ -369,7 +369,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)  	if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||  		(!compat && DECOMPOSITION_IS_COMPAT(entry)))  	{ -		pg_wchar   *res = *result; +		char32_t   *res = *result;  		res[*current] = code;  		(*current)++; @@ -382,7 +382,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)  	decomp = get_code_decomposition(entry, &dec_size);  	for (i = 0; i < dec_size; i++)  	{ -		pg_wchar	lcode = (pg_wchar) decomp[i]; +		char32_t	lcode = (char32_t) decomp[i];  		/* Leave if no more decompositions */  		decompose_code(lcode, compat, result, current); @@ -398,17 +398,17 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)   * malloc. Or NULL if we run out of memory. In backend, the returned   * string is palloc'd instead, and OOM is reported with ereport().   */ -pg_wchar * -unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) +char32_t * +unicode_normalize(UnicodeNormalizationForm form, const char32_t *input)  {  	bool		compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);  	bool		recompose = (form == UNICODE_NFC || form == UNICODE_NFKC); -	pg_wchar   *decomp_chars; -	pg_wchar   *recomp_chars; +	char32_t   *decomp_chars; +	char32_t   *recomp_chars;  	int			decomp_size,  				current_size;  	int			count; -	const pg_wchar *p; +	const char32_t *p;  	/* variables for recomposition */  	int			last_class; @@ -425,7 +425,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)  	for (p = input; *p; p++)  		decomp_size += get_decomposed_size(*p, compat); -	decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); +	decomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));  	if (decomp_chars == NULL)  		return NULL; @@ -448,9 +448,9 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)  	 */  	for (count = 1; count < decomp_size; count++)  	{ -		pg_wchar	prev = decomp_chars[count - 1]; -		pg_wchar	next = decomp_chars[count]; -		pg_wchar	tmp; +		char32_t	prev = decomp_chars[count - 1]; +		char32_t	next = decomp_chars[count]; +		char32_t	tmp;  		const uint8 prevClass = get_canonical_class(prev);  		const uint8 nextClass = get_canonical_class(next); @@ -487,7 +487,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)  	 * longer than the decomposed one, so make the allocation of the output  	 * string based on that assumption.  	 */ -	recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); +	recomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));  	if (!recomp_chars)  	{  		FREE(decomp_chars); @@ -501,9 +501,9 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)  	for (count = 1; count < decomp_size; count++)  	{ -		pg_wchar	ch = decomp_chars[count]; +		char32_t	ch = decomp_chars[count];  		int			ch_class = get_canonical_class(ch); -		pg_wchar	composite; +		char32_t	composite;  		if (last_class < ch_class &&  			recompose_code(starter_ch, ch, &composite)) @@ -524,7 +524,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)  			recomp_chars[target_pos++] = ch;  		}  	} -	recomp_chars[target_pos] = (pg_wchar) '\0'; +	recomp_chars[target_pos] = (char32_t) '\0';  	FREE(decomp_chars); @@ -540,7 +540,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)  #ifndef FRONTEND  static const pg_unicode_normprops * -qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo) +qc_hash_lookup(char32_t ch, const pg_unicode_norminfo *norminfo)  {  	int			h;  	uint32		hashkey; @@ -571,7 +571,7 @@ qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)   * Look up the normalization quick check character property   */  static UnicodeNormalizationQC -qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch) +qc_is_allowed(UnicodeNormalizationForm form, char32_t ch)  {  	const pg_unicode_normprops *found = NULL; @@ -595,7 +595,7 @@ qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)  }  UnicodeNormalizationQC -unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input) +unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input)  {  	uint8		lastCanonicalClass = 0;  	UnicodeNormalizationQC result = UNICODE_NORM_QC_YES; @@ -610,9 +610,9 @@ unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *  	if (form == UNICODE_NFD || form == UNICODE_NFKD)  		return UNICODE_NORM_QC_MAYBE; -	for (const pg_wchar *p = input; *p; p++) +	for (const char32_t *p = input; *p; p++)  	{ -		pg_wchar	ch = *p; +		char32_t	ch = *p;  		uint8		canonicalClass;  		UnicodeNormalizationQC check;  | 
