summaryrefslogtreecommitdiff
path: root/src/common/unicode/generate-unicode_norm_table.pl
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2020-03-24 08:49:52 +0100
committerPeter Eisentraut <peter@eisentraut.org>2020-03-24 10:02:46 +0100
commitd40d564c5a920e1121b28463dfed74441cbae5c0 (patch)
tree7a3cc5d8353896fb5abafeee9eaf63952beb2563 /src/common/unicode/generate-unicode_norm_table.pl
parentcedffbdb8b137325a79e07a976457bc2314adf9b (diff)
Add support for other normal forms to Unicode normalization API
It previously only supported NFKC, for use by SASLprep. This expands the API to offer the choice of all four normalization forms. Right now, there are no internal users of the forms other than NFKC. Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Andreas Karlsson <andreas@proxel.se> Discussion: https://www.postgresql.org/message-id/flat/c1909f27-c269-2ed9-12f8-3ab72c8caf7a@2ndquadrant.com
Diffstat (limited to 'src/common/unicode/generate-unicode_norm_table.pl')
-rw-r--r--src/common/unicode/generate-unicode_norm_table.pl20
1 files changed, 11 insertions, 9 deletions
diff --git a/src/common/unicode/generate-unicode_norm_table.pl b/src/common/unicode/generate-unicode_norm_table.pl
index ad995646337..cd5f502d540 100644
--- a/src/common/unicode/generate-unicode_norm_table.pl
+++ b/src/common/unicode/generate-unicode_norm_table.pl
@@ -99,10 +99,12 @@ typedef struct
#define DECOMP_NO_COMPOSE 0x80 /* don't use for re-composition */
#define DECOMP_INLINE 0x40 /* decomposition is stored inline in
* dec_index */
+#define DECOMP_COMPAT 0x20 /* compatibility mapping */
-#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x3F)
-#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & DECOMP_NO_COMPOSE) != 0)
+#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x1F)
+#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & (DECOMP_NO_COMPOSE | DECOMP_COMPAT)) != 0)
#define DECOMPOSITION_IS_INLINE(x) (((x)->dec_size_flags & DECOMP_INLINE) != 0)
+#define DECOMPOSITION_IS_COMPAT(x) (((x)->dec_size_flags & DECOMP_COMPAT) != 0)
/* Table of Unicode codepoints and their decompositions */
static const pg_unicode_decomposition UnicodeDecompMain[$num_characters] =
@@ -136,22 +138,22 @@ foreach my $char (@characters)
# Decomposition size
# Print size of decomposition
my $decomp_size = scalar(@decomp_elts);
+ die if $decomp_size > 0x1F; # to not overrun bitmask
my $first_decomp = shift @decomp_elts;
my $flags = "";
my $comment = "";
- if ($decomp_size == 2)
+ if ($compat)
{
+ $flags .= " | DECOMP_COMPAT";
+ }
+ if ($decomp_size == 2)
+ {
# Should this be used for recomposition?
- if ($compat)
- {
- $flags .= " | DECOMP_NO_COMPOSE";
- $comment = "compatibility mapping";
- }
- elsif ($character_hash{$first_decomp}
+ if ($character_hash{$first_decomp}
&& $character_hash{$first_decomp}->{class} != 0)
{
$flags .= " | DECOMP_NO_COMPOSE";