summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2019-03-17 08:16:33 +0100
committerPeter Eisentraut <peter@eisentraut.org>2019-03-17 08:47:15 +0100
commitb8f9a2a69a279d118e366a0d3d45caa84a7620b1 (patch)
tree1567d34de216333da585621e002cdb4d7b771c14
parent042162d6281a7daf1291931ee7b0a5641d3a73d7 (diff)
Add support for collation attributes on older ICU versions
Starting in ICU 54, collation customization attributes can be specified in the locale string, for example "@colStrength=primary;colCaseLevel=yes". Add support for this for older ICU versions as well, by adding some minimal parsing of the attributes in the locale string and calling ucol_setAttribute() on them. This is essentially what never ICU versions do internally in ucol_open(). This was we can offer this functionality in a consistent way in all ICU versions supported by PostgreSQL. Also add some tests for ICU collation customization. Reported-by: Daniel Verite <daniel@manitou-mail.org> Discussion: https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
-rw-r--r--src/backend/utils/adt/pg_locale.c104
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out39
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql21
3 files changed, 164 insertions, 0 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 50b8b31645d..ec14bad4e34 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,7 @@
#include "catalog/pg_control.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
+#include "utils/formatting.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
static char *IsoLocaleName(const char *); /* MSVC specific */
#endif
+#ifdef USE_ICU
+static void icu_set_collation_attributes(UCollator *collator, const char *loc);
+#endif
/*
* pg_perm_setlocale
@@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
(errmsg("could not open collator for locale \"%s\": %s",
collcollate, u_errorName(status))));
+ if (U_ICU_VERSION_MAJOR_NUM < 54)
+ icu_set_collation_attributes(collator, collcollate);
+
/* We will leak this string if we get an error below :-( */
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
collcollate);
@@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
return len_result;
}
+/*
+ * Parse collation attributes and apply them to the open collator. This takes
+ * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
+ * applies the key-value arguments.
+ *
+ * Starting with ICU version 54, the attributes are processed automatically by
+ * ucol_open(), so this is only necessary for emulating this behavior on older
+ * versions.
+ */
+pg_attribute_unused()
+static void
+icu_set_collation_attributes(UCollator *collator, const char *loc)
+{
+ char *str = asc_tolower(loc, strlen(loc));
+
+ str = strchr(str, '@');
+ if (!str)
+ return;
+ str++;
+
+ for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
+ {
+ char *e = strchr(token, '=');
+
+ if (e)
+ {
+ char *name;
+ char *value;
+ UColAttribute uattr = -1;
+ UColAttributeValue uvalue = -1;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+
+ *e = '\0';
+ name = token;
+ value = e + 1;
+
+ /*
+ * See attribute name and value lists in ICU i18n/coll.cpp
+ */
+ if (strcmp(name, "colstrength") == 0)
+ uattr = UCOL_STRENGTH;
+ else if (strcmp(name, "colbackwards") == 0)
+ uattr = UCOL_FRENCH_COLLATION;
+ else if (strcmp(name, "colcaselevel") == 0)
+ uattr = UCOL_CASE_LEVEL;
+ else if (strcmp(name, "colcasefirst") == 0)
+ uattr = UCOL_CASE_FIRST;
+ else if (strcmp(name, "colalternate") == 0)
+ uattr = UCOL_ALTERNATE_HANDLING;
+ else if (strcmp(name, "colnormalization") == 0)
+ uattr = UCOL_NORMALIZATION_MODE;
+ else if (strcmp(name, "colnumeric") == 0)
+ uattr = UCOL_NUMERIC_COLLATION;
+ /* ignore if unknown */
+
+ if (strcmp(value, "primary") == 0)
+ uvalue = UCOL_PRIMARY;
+ else if (strcmp(value, "secondary") == 0)
+ uvalue = UCOL_SECONDARY;
+ else if (strcmp(value, "tertiary") == 0)
+ uvalue = UCOL_TERTIARY;
+ else if (strcmp(value, "quaternary") == 0)
+ uvalue = UCOL_QUATERNARY;
+ else if (strcmp(value, "identical") == 0)
+ uvalue = UCOL_IDENTICAL;
+ else if (strcmp(value, "no") == 0)
+ uvalue = UCOL_OFF;
+ else if (strcmp(value, "yes") == 0)
+ uvalue = UCOL_ON;
+ else if (strcmp(value, "shifted") == 0)
+ uvalue = UCOL_SHIFTED;
+ else if (strcmp(value, "non-ignorable") == 0)
+ uvalue = UCOL_NON_IGNORABLE;
+ else if (strcmp(value, "lower") == 0)
+ uvalue = UCOL_LOWER_FIRST;
+ else if (strcmp(value, "upper") == 0)
+ uvalue = UCOL_UPPER_FIRST;
+ else
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+
+ if (uattr != -1 && uvalue != -1)
+ ucol_setAttribute(collator, uattr, uvalue, &status);
+
+ /*
+ * Pretend the error came from ucol_open(), for consistent error
+ * message across ICU versions.
+ */
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ loc, u_errorName(status))));
+ }
+ }
+}
+
#endif /* USE_ICU */
/*
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index f95d1652885..4b94921cf88 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1100,6 +1100,45 @@ select textrange_en_us('A','Z') @> 'b'::text;
drop type textrange_c;
drop type textrange_en_us;
+-- test ICU collation customization
+CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
+ERROR: could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
-- cleanup
SET client_min_messages TO warning;
DROP SCHEMA collate_tests CASCADE;
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 0aeba3e202b..73fb1232a7d 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -425,6 +425,27 @@ drop type textrange_c;
drop type textrange_en_us;
+-- test ICU collation customization
+
+CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
+
+CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
+
+CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
+
+CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
+
+CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
+
+CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
+
+
-- cleanup
SET client_min_messages TO warning;
DROP SCHEMA collate_tests CASCADE;