From 2bfd1b1ee562c4e4fd065c7f7d1beaa9b9852070 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 21 Aug 2017 11:22:00 -0400 Subject: Don't install ICU collation keyword variants Users can still create them themselves. Instead, document Unicode TR 35 collation options for ICU, so users can create all this themselves. Reviewed-by: Peter Geoghegan --- doc/src/sgml/charset.sgml | 98 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 14 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index f2a4acc1150..44e43503a61 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -664,13 +664,6 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1; - - de-u-co-phonebk-x-icu - - German collation, phone book variant - - - de-AT-x-icu @@ -683,13 +676,6 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1; - - de-AT-u-co-phonebk-x-icu - - German collation for Austria, phone book variant - - - und-x-icu (for undefined) @@ -709,6 +695,90 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1; will draw an error along the lines of collation "de-x-icu" for encoding "WIN874" does not exist. + + + ICU allows collations to be customized beyond the basic language+country + set that is preloaded by initdb. Users are encouraged + to define their own collation objects that make use of these facilities to + suit the sorting behavior to their requirements. Here are some examples: + + + + CREATE COLLATION "de-u-co-phonebk-x-icu" (provider = icu, locale = 'de-u-co-phonebk') + + German collation with phone book collation type + + + + + CREATE COLLATION "und-u-co-emoji-x-icu" (provider = icu, locale = 'und-u-co-emoji') + + + Root collation with Emoji collation type, per Unicode Technical Standard #51 + + + + + + CREATE COLLATION digitslast (provider = icu, locale = 'en-u-kr-latn-digit') + + + Sort digits after Latin letters. (The default is digits before letters.) + + + + + + CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper') + + + Sort upper-case letters before lower-case letters. (The default is + lower-case letters first.) + + + + + + CREATE COLLATION special (provider = icu, locale = 'en-u-kf-upper-kr-latn-digit') + + + Combines both of the above options. + + + + + + CREATE COLLATION numeric (provider = icu, locale = 'en-u-kn-true') + + + Numeric ordering, sorts sequences of digits by their numeric value, + for example: A-21 < A-123 + (also known as natural sort). + + + + + + See Unicode + Technical Standard #35 + and BCP 47 for + details. The list of possible collation types (co + subtag) can be found in + the CLDR + repository. + The ICU Locale + Explorer can be used to check the details of a particular locale + definition. + + + + Note that while this system allows creating collations that ignore + case or ignore accents or similar (using + the ks key), PostgreSQL does not at the moment allow + such collations to act in a truly case- or accent-insensitive manner. Any + strings that compare equal according to the collation but are not + byte-wise equal will be sorted according to their byte values. + -- cgit v1.2.3