From ec48314708262d8ea6cdcb83f803fc83dd89e721 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Fri, 7 May 2021 20:17:42 +1200 Subject: Revert per-index collation version tracking feature. Design problems were discovered in the handling of composite types and record types that would cause some relevant versions not to be recorded. Misgivings were also expressed about the use of the pg_depend catalog for this purpose. We're out of time for this release so we'll revert and try again. Commits reverted: 1bf946bd: Doc: Document known problem with Windows collation versions. cf002008: Remove no-longer-relevant test case. ef387bed: Fix bogus collation-version-recording logic. 0fb0a050: Hide internal error for pg_collation_actual_version(). ff942057: Suppress "warning: variable 'collcollate' set but not used". d50e3b1f: Fix assertion in collation version lookup. f24b1569: Rethink extraction of collation dependencies. 257836a7: Track collation versions for indexes. cd6f479e: Add pg_depend.refobjversion. 7d1297df: Remove pg_collation.collversion. Discussion: https://postgr.es/m/CA%2BhUKGLhj5t1fcjqAu8iD9B3ixJtsTNqyCCD4V0aTO9kAKAjjA%40mail.gmail.com --- doc/src/sgml/catalogs.sgml | 23 ++++++------- doc/src/sgml/charset.sgml | 48 -------------------------- doc/src/sgml/func.sgml | 8 +++-- doc/src/sgml/ref/alter_collation.sgml | 63 ++++++++++++++++++++++++++++++++++ doc/src/sgml/ref/alter_index.sgml | 15 -------- doc/src/sgml/ref/create_collation.sgml | 21 ++++++++++++ doc/src/sgml/ref/pgupgrade.sgml | 15 -------- doc/src/sgml/ref/reindex.sgml | 9 ----- 8 files changed, 100 insertions(+), 102 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 492ed348b3a..29ee9605b61 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2374,6 +2374,17 @@ SCRAM-SHA-256$<iteration count>:&l LC_CTYPE for this collation object + + + + collversion text + + + Provider-specific version of the collation. This is recorded when the + collation is created and then checked when it is used, to detect + changes in the collation definition that could lead to data corruption. + + @@ -3317,18 +3328,6 @@ SCRAM-SHA-256$<iteration count>:&l A code defining the specific semantics of this dependency relationship; see text - - - - refobjversion text - - - An optional version for the referenced object. Currently used for - indexes' collations (see ). - - - - diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 1c673cc1103..98df74d0e10 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -948,54 +948,6 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr - - - Collation Versions - - - The sort order defined by a collation is not necessarily fixed over time. - PostgreSQL relies on external libraries that - are subject to operating system upgrades, and can also differ between - servers involved in binary replication and file-system-level migration. - Persistent data structures such as B-trees that depend on sort order might - be corrupted by any resulting change. - PostgreSQL defends against this by recording the - current version of each referenced collation for any index that depends on - it in the - pg_depend - catalog, if the collation provider makes that information available. If the - provider later begins to report a different version, a warning will be - issued when the index is accessed, until either the - command or the - command is used to update the version. - - - Version information is available from the - icu provider on all operating systems. For the - libc provider, versions are currently only available - on systems using the GNU C library (most Linux systems), FreeBSD and - Windows. - - - - - When using the GNU C library for collations, the C library's version - is used as a proxy for the collation version. Many Linux distributions - change collation definitions only when upgrading the C library, but this - approach is imperfect as maintainers are free to back-port newer - collation definitions to older C library releases. - - - When using Windows collations, version information is only available for - collations defined with BCP 47 language tags such as - en-US. Currently, initdb selects - a default locale using a traditional Windows language and country - string such as English_United States.1252. The - --lc-collate option can be used to provide an explicit - locale name in BCP 47 format. - - - diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 0b5571460de..4d1f1794ca3 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -26547,9 +26547,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); Returns the actual version of the collation object as it is currently - installed in the operating system. null is returned - on operating systems where PostgreSQL - doesn't have support for versions. + installed in the operating system. If this is different from the + value in + pg_collation.collversion, + then objects depending on the collation might need to be rebuilt. See + also . diff --git a/doc/src/sgml/ref/alter_collation.sgml b/doc/src/sgml/ref/alter_collation.sgml index 65429aabe28..af9ff2867b7 100644 --- a/doc/src/sgml/ref/alter_collation.sgml +++ b/doc/src/sgml/ref/alter_collation.sgml @@ -21,6 +21,8 @@ PostgreSQL documentation +ALTER COLLATION name REFRESH VERSION + ALTER COLLATION name RENAME TO new_name ALTER COLLATION name OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER } ALTER COLLATION name SET SCHEMA new_schema @@ -86,9 +88,70 @@ ALTER COLLATION name SET SCHEMA new_sche + + REFRESH VERSION + + + Update the collation's version. + See below. + + + + + Notes + + + When using collations provided by the ICU library, the ICU-specific version + of the collator is recorded in the system catalog when the collation object + is created. When the collation is used, the current version is + checked against the recorded version, and a warning is issued when there is + a mismatch, for example: + +WARNING: collation "xx-x-icu" has version mismatch +DETAIL: The collation in the database was created using version 1.2.3.4, but the operating system provides version 2.3.4.5. +HINT: Rebuild all objects affected by this collation and run ALTER COLLATION pg_catalog."xx-x-icu" REFRESH VERSION, or build PostgreSQL with the right library version. + + A change in collation definitions can lead to corrupt indexes and other + problems because the database system relies on stored objects having a + certain sort order. Generally, this should be avoided, but it can happen + in legitimate circumstances, such as when + using pg_upgrade to upgrade to server binaries linked + with a newer version of ICU. When this happens, all objects depending on + the collation should be rebuilt, for example, + using REINDEX. When that is done, the collation version + can be refreshed using the command ALTER COLLATION ... REFRESH + VERSION. This will update the system catalog to record the + current collator version and will make the warning go away. Note that this + does not actually check whether all affected objects have been rebuilt + correctly. + + + When using collations provided by libc and + PostgreSQL was built with the GNU C library, the + C library's version is used as a collation version. Since collation + definitions typically change only with GNU C library releases, this provides + some defense against corruption, but it is not completely reliable. + + + Currently, there is no version tracking for the database default collation. + + + + The following query can be used to identify all collations in the current + database that need to be refreshed and the objects that depend on them: + pg_collation_actual_version(c.oid) + ORDER BY 1, 2; +]]> + + Examples diff --git a/doc/src/sgml/ref/alter_index.sgml b/doc/src/sgml/ref/alter_index.sgml index 4b446384c26..e26efec064b 100644 --- a/doc/src/sgml/ref/alter_index.sgml +++ b/doc/src/sgml/ref/alter_index.sgml @@ -25,7 +25,6 @@ ALTER INDEX [ IF EXISTS ] name RENA ALTER INDEX [ IF EXISTS ] name SET TABLESPACE tablespace_name ALTER INDEX name ATTACH PARTITION index_name ALTER INDEX name [ NO ] DEPENDS ON EXTENSION extension_name -ALTER INDEX name ALTER COLLATION collation_name REFRESH VERSION ALTER INDEX [ IF EXISTS ] name SET ( storage_parameter [= value] [, ... ] ) ALTER INDEX [ IF EXISTS ] name RESET ( storage_parameter [, ... ] ) ALTER INDEX [ IF EXISTS ] name ALTER [ COLUMN ] column_number @@ -113,20 +112,6 @@ ALTER INDEX ALL IN TABLESPACE name - - ALTER COLLATION collation_name REFRESH VERSION - - - Silences warnings about mismatched collation versions, by declaring - that the index is compatible with the current collation definition. - Be aware that incorrect use of this command can hide index corruption. - If you don't know whether a collation's definition has changed - incompatibly, is a safe alternative. - See for more information. - - - - SET ( storage_parameter [= value] [, ... ] ) diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index b97842071f9..58f5f0cd63a 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -27,6 +27,7 @@ CREATE COLLATION [ IF NOT EXISTS ] name ( [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] [ DETERMINISTIC = boolean, ] + [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -148,6 +149,26 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM + + version + + + + Specifies the version string to store with the collation. Normally, + this should be omitted, which will cause the version to be computed + from the actual version of the collation as provided by the operating + system. This option is intended to be used + by pg_upgrade for copying the version from an + existing installation. + + + + See also for how to handle + collation version mismatches. + + + + existing_collation diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 4737d97d202..a83c63cd98f 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -215,21 +215,6 @@ PostgreSQL documentation - - - - - When upgrading indexes from releases before 14 that didn't track - collation versions, pg_upgrade - assumes by default that the upgraded indexes are compatible with the - currently installed versions of relevant collations (see - ). Specify - to mark - them as needing to be rebuilt instead. - - - - diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index 53c362dcd3e..e6b25ee670f 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -40,15 +40,6 @@ REINDEX [ ( option [, ...] ) ] { IN several scenarios in which to use REINDEX: - - - The index depends on the sort order of a collation, and the definition - of the collation has changed. This can cause index scans to fail to - find keys that are present. See for - more information. - - - An index has become corrupted, and no longer contains valid -- cgit v1.2.3