diff options
author | Peter Eisentraut <peter@eisentraut.org> | 2020-03-24 08:49:52 +0100 |
---|---|---|
committer | Peter Eisentraut <peter@eisentraut.org> | 2020-03-24 10:02:46 +0100 |
commit | d40d564c5a920e1121b28463dfed74441cbae5c0 (patch) | |
tree | 7a3cc5d8353896fb5abafeee9eaf63952beb2563 /src/common/unicode/generate-unicode_norm_table.pl | |
parent | cedffbdb8b137325a79e07a976457bc2314adf9b (diff) |
Add support for other normal forms to Unicode normalization API
It previously only supported NFKC, for use by SASLprep. This expands
the API to offer the choice of all four normalization forms. Right
now, there are no internal users of the forms other than NFKC.
Reviewed-by: Daniel Verite <daniel@manitou-mail.org>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Discussion: https://www.postgresql.org/message-id/flat/c1909f27-c269-2ed9-12f8-3ab72c8caf7a@2ndquadrant.com
Diffstat (limited to 'src/common/unicode/generate-unicode_norm_table.pl')
-rw-r--r-- | src/common/unicode/generate-unicode_norm_table.pl | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/common/unicode/generate-unicode_norm_table.pl b/src/common/unicode/generate-unicode_norm_table.pl index ad995646337..cd5f502d540 100644 --- a/src/common/unicode/generate-unicode_norm_table.pl +++ b/src/common/unicode/generate-unicode_norm_table.pl @@ -99,10 +99,12 @@ typedef struct #define DECOMP_NO_COMPOSE 0x80 /* don't use for re-composition */ #define DECOMP_INLINE 0x40 /* decomposition is stored inline in * dec_index */ +#define DECOMP_COMPAT 0x20 /* compatibility mapping */ -#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x3F) -#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & DECOMP_NO_COMPOSE) != 0) +#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x1F) +#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & (DECOMP_NO_COMPOSE | DECOMP_COMPAT)) != 0) #define DECOMPOSITION_IS_INLINE(x) (((x)->dec_size_flags & DECOMP_INLINE) != 0) +#define DECOMPOSITION_IS_COMPAT(x) (((x)->dec_size_flags & DECOMP_COMPAT) != 0) /* Table of Unicode codepoints and their decompositions */ static const pg_unicode_decomposition UnicodeDecompMain[$num_characters] = @@ -136,22 +138,22 @@ foreach my $char (@characters) # Decomposition size # Print size of decomposition my $decomp_size = scalar(@decomp_elts); + die if $decomp_size > 0x1F; # to not overrun bitmask my $first_decomp = shift @decomp_elts; my $flags = ""; my $comment = ""; - if ($decomp_size == 2) + if ($compat) { + $flags .= " | DECOMP_COMPAT"; + } + if ($decomp_size == 2) + { # Should this be used for recomposition? - if ($compat) - { - $flags .= " | DECOMP_NO_COMPOSE"; - $comment = "compatibility mapping"; - } - elsif ($character_hash{$first_decomp} + if ($character_hash{$first_decomp} && $character_hash{$first_decomp}->{class} != 0) { $flags .= " | DECOMP_NO_COMPOSE"; |