diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/catalog/namespace.c | 101 | ||||
-rw-r--r-- | src/backend/commands/collationcmds.c | 86 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 |
3 files changed, 123 insertions, 66 deletions
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 64f6feef9da..029a132bb4e 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -1915,9 +1915,60 @@ OpfamilyIsVisible(Oid opfid) } /* + * lookup_collation + * If there's a collation of the given name/namespace, and it works + * with the given encoding, return its OID. Else return InvalidOid. + */ +static Oid +lookup_collation(const char *collname, Oid collnamespace, int32 encoding) +{ + Oid collid; + HeapTuple colltup; + Form_pg_collation collform; + + /* Check for encoding-specific entry (exact match) */ + collid = GetSysCacheOid3(COLLNAMEENCNSP, + PointerGetDatum(collname), + Int32GetDatum(encoding), + ObjectIdGetDatum(collnamespace)); + if (OidIsValid(collid)) + return collid; + + /* + * Check for any-encoding entry. This takes a bit more work: while libc + * collations with collencoding = -1 do work with all encodings, ICU + * collations only work with certain encodings, so we have to check that + * aspect before deciding it's a match. + */ + colltup = SearchSysCache3(COLLNAMEENCNSP, + PointerGetDatum(collname), + Int32GetDatum(-1), + ObjectIdGetDatum(collnamespace)); + if (!HeapTupleIsValid(colltup)) + return InvalidOid; + collform = (Form_pg_collation) GETSTRUCT(colltup); + if (collform->collprovider == COLLPROVIDER_ICU) + { + if (is_encoding_supported_by_icu(encoding)) + collid = HeapTupleGetOid(colltup); + else + collid = InvalidOid; + } + else + { + collid = HeapTupleGetOid(colltup); + } + ReleaseSysCache(colltup); + return collid; +} + +/* * CollationGetCollid * Try to resolve an unqualified collation name. * Returns OID if collation found in search path, else InvalidOid. + * + * Note that this will only find collations that work with the current + * database's encoding. */ Oid CollationGetCollid(const char *collname) @@ -1935,19 +1986,7 @@ CollationGetCollid(const char *collname) if (namespaceId == myTempNamespace) continue; /* do not look in temp namespace */ - /* Check for database-encoding-specific entry */ - collid = GetSysCacheOid3(COLLNAMEENCNSP, - PointerGetDatum(collname), - Int32GetDatum(dbencoding), - ObjectIdGetDatum(namespaceId)); - if (OidIsValid(collid)) - return collid; - - /* Check for any-encoding entry */ - collid = GetSysCacheOid3(COLLNAMEENCNSP, - PointerGetDatum(collname), - Int32GetDatum(-1), - ObjectIdGetDatum(namespaceId)); + collid = lookup_collation(collname, namespaceId, dbencoding); if (OidIsValid(collid)) return collid; } @@ -1961,6 +2000,9 @@ CollationGetCollid(const char *collname) * Determine whether a collation (identified by OID) is visible in the * current search path. Visible means "would be found by searching * for the unqualified collation name". + * + * Note that only collations that work with the current database's encoding + * will be considered visible. */ bool CollationIsVisible(Oid collid) @@ -1990,9 +2032,10 @@ CollationIsVisible(Oid collid) { /* * If it is in the path, it might still not be visible; it could be - * hidden by another conversion of the same name earlier in the path. - * So we must do a slow check to see if this conversion would be found - * by CollationGetCollid. + * hidden by another collation of the same name earlier in the path, + * or it might not work with the current DB encoding. So we must do a + * slow check to see if this collation would be found by + * CollationGetCollid. */ char *collname = NameStr(collform->collname); @@ -3442,6 +3485,9 @@ PopOverrideSearchPath(void) /* * get_collation_oid - find a collation by possibly qualified name + * + * Note that this will only find collations that work with the current + * database's encoding. */ Oid get_collation_oid(List *name, bool missing_ok) @@ -3463,17 +3509,7 @@ get_collation_oid(List *name, bool missing_ok) if (missing_ok && !OidIsValid(namespaceId)) return InvalidOid; - /* first try for encoding-specific entry, then any-encoding */ - colloid = GetSysCacheOid3(COLLNAMEENCNSP, - PointerGetDatum(collation_name), - Int32GetDatum(dbencoding), - ObjectIdGetDatum(namespaceId)); - if (OidIsValid(colloid)) - return colloid; - colloid = GetSysCacheOid3(COLLNAMEENCNSP, - PointerGetDatum(collation_name), - Int32GetDatum(-1), - ObjectIdGetDatum(namespaceId)); + colloid = lookup_collation(collation_name, namespaceId, dbencoding); if (OidIsValid(colloid)) return colloid; } @@ -3489,16 +3525,7 @@ get_collation_oid(List *name, bool missing_ok) if (namespaceId == myTempNamespace) continue; /* do not look in temp namespace */ - colloid = GetSysCacheOid3(COLLNAMEENCNSP, - PointerGetDatum(collation_name), - Int32GetDatum(dbencoding), - ObjectIdGetDatum(namespaceId)); - if (OidIsValid(colloid)) - return colloid; - colloid = GetSysCacheOid3(COLLNAMEENCNSP, - PointerGetDatum(collation_name), - Int32GetDatum(-1), - ObjectIdGetDatum(namespaceId)); + colloid = lookup_collation(collation_name, namespaceId, dbencoding); if (OidIsValid(colloid)) return colloid; } diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 1c43f0b0ed9..7f2ce4db4c6 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -353,6 +353,21 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) } +/* + * Check a string to see if it is pure ASCII + */ +static bool +is_all_ascii(const char *str) +{ + while (*str) + { + if (IS_HIGHBIT_SET(*str)) + return false; + str++; + } + return true; +} + /* will we use "locale -a" in pg_import_system_collations? */ #if defined(HAVE_LOCALE_T) && !defined(WIN32) #define READ_LOCALE_A_OUTPUT @@ -431,7 +446,9 @@ get_icu_language_tag(const char *localename) /* * Get a comment (specifically, the display name) for an ICU locale. - * The result is a palloc'd string. + * The result is a palloc'd string, or NULL if we can't get a comment + * or find that it's not all ASCII. (We can *not* accept non-ASCII + * comments, because the contents of template0 must be encoding-agnostic.) */ static char * get_icu_locale_comment(const char *localename) @@ -439,6 +456,7 @@ get_icu_locale_comment(const char *localename) UErrorCode status; UChar displayname[128]; int32 len_uchar; + int32 i; char *result; status = U_ZERO_ERROR; @@ -446,11 +464,20 @@ get_icu_locale_comment(const char *localename) displayname, lengthof(displayname), &status); if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not get display name for locale \"%s\": %s", - localename, u_errorName(status)))); + return NULL; /* no good reason to raise an error */ + + /* Check for non-ASCII comment (can't use is_all_ascii for this) */ + for (i = 0; i < len_uchar; i++) + { + if (displayname[i] > 127) + return NULL; + } - icu_from_uchar(&result, displayname, len_uchar); + /* OK, transcribe */ + result = palloc(len_uchar + 1); + for (i = 0; i < len_uchar; i++) + result[i] = displayname[i]; + result[len_uchar] = '\0'; return result; } @@ -502,7 +529,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS) { size_t len; int enc; - bool skip; char alias[NAMEDATALEN]; len = strlen(localebuf); @@ -521,16 +547,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * interpret the non-ASCII characters. We can't do much with * those, so we filter them out. */ - skip = false; - for (i = 0; i < len; i++) - { - if (IS_HIGHBIT_SET(localebuf[i])) - { - skip = true; - break; - } - } - if (skip) + if (!is_all_ascii(localebuf)) { elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); continue; @@ -642,14 +659,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS) /* Load collations known to ICU */ #ifdef USE_ICU - if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) - { - ereport(NOTICE, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("encoding \"%s\" not supported by ICU", - pg_encoding_to_char(GetDatabaseEncoding())))); - } - else { int i; @@ -661,6 +670,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) { const char *name; char *langtag; + char *icucomment; const char *collcollate; UEnumeration *en; UErrorCode status; @@ -674,6 +684,14 @@ pg_import_system_collations(PG_FUNCTION_ARGS) langtag = get_icu_language_tag(name); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; + + /* + * Be paranoid about not allowing any non-ASCII strings into + * pg_collation + */ + if (!is_all_ascii(langtag) || !is_all_ascii(collcollate)) + continue; + collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, -1, @@ -686,8 +704,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS) CommandCounterIncrement(); - CreateComments(collid, CollationRelationId, 0, - get_icu_locale_comment(name)); + icucomment = get_icu_locale_comment(name); + if (icucomment) + CreateComments(collid, CollationRelationId, 0, + icucomment); } /* @@ -708,6 +728,14 @@ pg_import_system_collations(PG_FUNCTION_ARGS) langtag = get_icu_language_tag(localeid); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid; + + /* + * Be paranoid about not allowing any non-ASCII strings into + * pg_collation + */ + if (!is_all_ascii(langtag) || !is_all_ascii(collcollate)) + continue; + collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, -1, @@ -720,8 +748,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS) CommandCounterIncrement(); - CreateComments(collid, CollationRelationId, 0, - get_icu_locale_comment(localeid)); + icucomment = get_icu_locale_comment(name); + if (icucomment) + CreateComments(collid, CollationRelationId, 0, + icucomment); } } if (U_FAILURE(status)) diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 14f916cde15..405e8e303bc 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201706231 +#define CATALOG_VERSION_NO 201706241 #endif |