summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut <peter_e@gmx.net>2017-08-21 11:22:00 -0400
committerPeter Eisentraut <peter_e@gmx.net>2017-08-21 19:21:14 -0400
commit958ffb8c286d93d1bfced17e6300d13f9634b431 (patch)
treeee8b8c141ef3a3f6c9a2bea83359c5e5324177e5
parenta79fb8e0c452a9b88206e2abd4add2b432a2596b (diff)
Don't install ICU collation keyword variants
Users can still create them themselves. Instead, document Unicode TR 35 collation options for ICU, so users can create all this themselves. Reviewed-by: Peter Geoghegan <pg@bowt.ie>
-rw-r--r--doc/src/sgml/charset.sgml98
-rw-r--r--src/backend/commands/collationcmds.c71
2 files changed, 84 insertions, 85 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index f2a4acc1150..44e43503a61 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -665,13 +665,6 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
</varlistentry>
<varlistentry>
- <term><literal>de-u-co-phonebk-x-icu</literal></term>
- <listitem>
- <para>German collation, phone book variant</para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
<term><literal>de-AT-x-icu</literal></term>
<listitem>
<para>German collation for Austria, default variant</para>
@@ -684,13 +677,6 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
</varlistentry>
<varlistentry>
- <term><literal>de-AT-u-co-phonebk-x-icu</literal></term>
- <listitem>
- <para>German collation for Austria, phone book variant</para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
<term><literal>und-x-icu</literal> (for <quote>undefined</quote>)</term>
<listitem>
<para>
@@ -709,6 +695,90 @@ SELECT a COLLATE "C" &lt; b COLLATE "POSIX" FROM test1;
will draw an error along the lines of <quote>collation "de-x-icu" for
encoding "WIN874" does not exist</>.
</para>
+
+ <para>
+ ICU allows collations to be customized beyond the basic language+country
+ set that is preloaded by <command>initdb</command>. Users are encouraged
+ to define their own collation objects that make use of these facilities to
+ suit the sorting behavior to their requirements. Here are some examples:
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>CREATE COLLATION "de-u-co-phonebk-x-icu" (provider = icu, locale = 'de-u-co-phonebk')</literal></term>
+ <listitem>
+ <para>German collation with phone book collation type</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>CREATE COLLATION "und-u-co-emoji-x-icu" (provider = icu, locale = 'und-u-co-emoji')</literal></term>
+ <listitem>
+ <para>
+ Root collation with Emoji collation type, per Unicode Technical Standard #51
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>CREATE COLLATION digitslast (provider = icu, locale = 'en-u-kr-latn-digit')</literal></term>
+ <listitem>
+ <para>
+ Sort digits after Latin letters. (The default is digits before letters.)
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper')</literal></term>
+ <listitem>
+ <para>
+ Sort upper-case letters before lower-case letters. (The default is
+ lower-case letters first.)
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>CREATE COLLATION special (provider = icu, locale = 'en-u-kf-upper-kr-latn-digit')</literal></term>
+ <listitem>
+ <para>
+ Combines both of the above options.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>CREATE COLLATION numeric (provider = icu, locale = 'en-u-kn-true')</literal></term>
+ <listitem>
+ <para>
+ Numeric ordering, sorts sequences of digits by their numeric value,
+ for example: <literal>A-21</literal> &lt; <literal>A-123</literal>
+ (also known as natural sort).
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ See <ulink url="http://unicode.org/reports/tr35/tr35-collation.html">Unicode
+ Technical Standard #35</ulink>
+ and <ulink url="https://tools.ietf.org/html/bcp47">BCP 47</ulink> for
+ details. The list of possible collation types (<literal>co</literal>
+ subtag) can be found in
+ the <ulink url="http://www.unicode.org/repos/cldr/trunk/common/bcp47/collation.xml">CLDR
+ repository</ulink>.
+ The <ulink url="https://ssl.icu-project.org/icu-bin/locexp">ICU Locale
+ Explorer</ulink> can be used to check the details of a particular locale
+ definition.
+ </para>
+
+ <para>
+ Note that while this system allows creating collations that <quote>ignore
+ case</quote> or <quote>ignore accents</quote> or similar (using
+ the <literal>ks</literal> key), PostgreSQL does not at the moment allow
+ such collations to act in a truly case- or accent-insensitive manner. Any
+ strings that compare equal according to the collation but are not
+ byte-wise equal will be sorted according to their byte values.
+ </para>
</sect4>
</sect3>
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index d36ce535604..9437731276f 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -687,30 +687,11 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
*/
for (i = -1; i < uloc_countAvailable(); i++)
{
- /*
- * In ICU 4.2, ucol_getKeywordValuesForLocale() sometimes returns
- * values that will not be accepted by uloc_toLanguageTag(). Skip
- * loading keyword variants in that version. (Both
- * ucol_getKeywordValuesForLocale() and uloc_toLanguageTag() are
- * new in ICU 4.2, so older versions are not supported at all.)
- *
- * XXX We have no information about ICU 4.3 through 4.7, but we
- * know the code below works with 4.8.
- */
-#if U_ICU_VERSION_MAJOR_NUM > 4 || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM > 2)
-#define LOAD_ICU_KEYWORD_VARIANTS
-#endif
-
const char *name;
char *langtag;
char *icucomment;
const char *collcollate;
Oid collid;
-#ifdef LOAD_ICU_KEYWORD_VARIANTS
- UEnumeration *en;
- UErrorCode status;
- const char *val;
-#endif
if (i == -1)
name = ""; /* ICU root locale */
@@ -744,58 +725,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
CreateComments(collid, CollationRelationId, 0,
icucomment);
}
-
- /*
- * Add keyword variants, if enabled.
- */
-#ifdef LOAD_ICU_KEYWORD_VARIANTS
- status = U_ZERO_ERROR;
- en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
- if (U_FAILURE(status))
- ereport(ERROR,
- (errmsg("could not get keyword values for locale \"%s\": %s",
- name, u_errorName(status))));
-
- status = U_ZERO_ERROR;
- uenum_reset(en, &status);
- while ((val = uenum_next(en, NULL, &status)))
- {
- char *localeid = psprintf("%s@collation=%s", name, val);
-
- langtag = get_icu_language_tag(localeid);
- collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
-
- /*
- * Be paranoid about not allowing any non-ASCII strings into
- * pg_collation
- */
- if (!is_all_ascii(langtag) || !is_all_ascii(collcollate))
- continue;
-
- collid = CollationCreate(psprintf("%s-x-icu", langtag),
- nspid, GetUserId(),
- COLLPROVIDER_ICU, -1,
- collcollate, collcollate,
- get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
- true, true);
- if (OidIsValid(collid))
- {
- ncreated++;
-
- CommandCounterIncrement();
-
- icucomment = get_icu_locale_comment(localeid);
- if (icucomment)
- CreateComments(collid, CollationRelationId, 0,
- icucomment);
- }
- }
- if (U_FAILURE(status))
- ereport(ERROR,
- (errmsg("could not get keyword values for locale \"%s\": %s",
- name, u_errorName(status))));
- uenum_close(en);
-#endif /* LOAD_ICU_KEYWORD_VARIANTS */
}
}
#endif /* USE_ICU */