diff options
author | Peter Eisentraut <peter@eisentraut.org> | 2024-11-15 14:52:28 +0100 |
---|---|---|
committer | Peter Eisentraut <peter@eisentraut.org> | 2024-11-15 14:55:54 +0100 |
commit | 9321d2fdf808e93095c4f86565dc8c8178675841 (patch) | |
tree | eb474d51ea45ffa135610283b4b9181722145922 /src/backend/utils/adt/ri_triggers.c | |
parent | 90bcc7c2db1d346e1153748e627c2f922b67eae7 (diff) |
Fix collation handling for foreign keys
Allowing foreign keys where the referenced and the referencing columns
have collations with different notions of equality is problematic.
This can only happen when using nondeterministic collations, for
example, if the referencing column is case-insensitive and the
referenced column is not, or vice versa. It does not happen if both
collations are deterministic.
To show one example:
CREATE COLLATION case_insensitive (provider = icu, deterministic = false, locale = 'und-u-ks-level2');
CREATE TABLE pktable (x text COLLATE "C" PRIMARY KEY);
CREATE TABLE fktable (x text COLLATE case_insensitive REFERENCES pktable ON UPDATE CASCADE ON DELETE CASCADE);
INSERT INTO pktable VALUES ('A'), ('a');
INSERT INTO fktable VALUES ('A');
BEGIN; DELETE FROM pktable WHERE x = 'a'; TABLE fktable; ROLLBACK;
BEGIN; DELETE FROM pktable WHERE x = 'A'; TABLE fktable; ROLLBACK;
Both of these DELETE statements delete the one row from fktable. So
this means that one row from fktable references two rows in pktable,
which should not happen. (That's why a primary key or unique
constraint is required on pktable.)
When nondeterministic collations were implemented, the SQL standard
available to yours truly said that referential integrity checks should
be performed with the collation of the referenced column, and so
that's how we implemented it. But this turned out to be a mistake in
the SQL standard, for the same reasons as above, that was later
(SQL:2016) fixed to require both collations to be the same. So that's
what we are aiming for here.
We don't have to be quite so strict. We can allow different
collations if they are both deterministic. This is also good for
backward compatibility.
So the new rule is that the collations either have to be the same or
both deterministic. Or in other words, if one of them is
nondeterministic, then both have to be the same.
Users upgrading from before that have affected setups will need to
make changes to their schemas (i.e., change one or both collations in
affected foreign-key relationships) before the upgrade will succeed.
Some of the nice test cases for the previous situation in
collate.icu.utf8.sql are now obsolete. They are changed to just check
the error checking of the new rule. Note that collate.sql already
contained a test for foreign keys with different deterministic
collations.
A bunch of code in ri_triggers.c that added a COLLATE clause to
enforce the referenced column's collation can be removed, because both
columns now have to have the same notion of equality, so it doesn't
matter which one to use.
Reported-by: Paul Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Jian He <jian.universality@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/78d824e0-b21e-480d-a252-e4b84bc2c24b@illuminatedcomputing.com
Diffstat (limited to 'src/backend/utils/adt/ri_triggers.c')
-rw-r--r-- | src/backend/utils/adt/ri_triggers.c | 57 |
1 files changed, 19 insertions, 38 deletions
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 6896e1ae638..91792cb2a47 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -207,7 +207,7 @@ static void ri_BuildQueryKey(RI_QueryKey *key, int32 constr_queryno); static bool ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, const RI_ConstraintInfo *riinfo, bool rel_is_pk); -static bool ri_CompareWithCast(Oid eq_opr, Oid typeid, +static bool ri_CompareWithCast(Oid eq_opr, Oid typeid, Oid collid, Datum lhs, Datum rhs); static void ri_InitHashTables(void); @@ -776,8 +776,6 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); - Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); - Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -786,8 +784,6 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); - if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) - ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -881,8 +877,6 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); - Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); - Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -891,8 +885,6 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); - if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) - ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -996,8 +988,6 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); - Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); - Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1009,8 +999,6 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); - if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) - ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1232,8 +1220,6 @@ ri_set(TriggerData *trigdata, bool is_set_null, int tgkind) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); - Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); - Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1243,8 +1229,6 @@ ri_set(TriggerData *trigdata, bool is_set_null, int tgkind) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); - if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) - ri_GenerateQualCollation(&querybuf, pk_coll); qualsep = "AND"; queryoids[i] = pk_type; } @@ -1998,19 +1982,17 @@ ri_GenerateQual(StringInfo buf, /* * ri_GenerateQualCollation --- add a COLLATE spec to a WHERE clause * - * At present, we intentionally do not use this function for RI queries that - * compare a variable to a $n parameter. Since parameter symbols always have - * default collation, the effect will be to use the variable's collation. - * Now that is only strictly correct when testing the referenced column, since - * the SQL standard specifies that RI comparisons should use the referenced - * column's collation. However, so long as all collations have the same - * notion of equality (which they do, because texteq reduces to bitwise - * equality), there's no visible semantic impact from using the referencing - * column's collation when testing it, and this is a good thing to do because - * it lets us use a normal index on the referencing column. However, we do - * have to use this function when directly comparing the referencing and - * referenced columns, if they are of different collations; else the parser - * will fail to resolve the collation to use. + * We only have to use this function when directly comparing the referencing + * and referenced columns, if they are of different collations; else the + * parser will fail to resolve the collation to use. We don't need to use + * this function for RI queries that compare a variable to a $n parameter. + * Since parameter symbols always have default collation, the effect will be + * to use the variable's collation. + * + * Note that we require that the collations of the referencing and the + * referenced column have the same notion of equality: Either they have to + * both be deterministic or else they both have to be the same. (See also + * ATAddForeignKeyConstraint().) */ static void ri_GenerateQualCollation(StringInfo buf, Oid collation) @@ -2952,7 +2934,7 @@ ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, * operator. Changes that compare equal will still satisfy the * constraint after the update. */ - if (!ri_CompareWithCast(eq_opr, RIAttType(rel, attnums[i]), + if (!ri_CompareWithCast(eq_opr, RIAttType(rel, attnums[i]), RIAttCollation(rel, attnums[i]), newvalue, oldvalue)) return false; } @@ -2968,11 +2950,12 @@ ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, * Call the appropriate comparison operator for two values. * Normally this is equality, but for the PERIOD part of foreign keys * it is ContainedBy, so the order of lhs vs rhs is significant. + * See below for how the collation is applied. * * NB: we have already checked that neither value is null. */ static bool -ri_CompareWithCast(Oid eq_opr, Oid typeid, +ri_CompareWithCast(Oid eq_opr, Oid typeid, Oid collid, Datum lhs, Datum rhs) { RI_CompareHashEntry *entry = ri_HashCompareOp(eq_opr, typeid); @@ -2998,9 +2981,9 @@ ri_CompareWithCast(Oid eq_opr, Oid typeid, * on the other side of a foreign-key constraint. Therefore, the * comparison here would need to be done with the collation of the *other* * table. For simplicity (e.g., we might not even have the other table - * open), we'll just use the default collation here, which could lead to - * some false negatives. All this would break if we ever allow - * database-wide collations to be nondeterministic. + * open), we'll use our own collation. This is fine because we require + * that both collations have the same notion of equality (either they are + * both deterministic or else they are both the same). * * With range/multirangetypes, the collation of the base type is stored as * part of the rangetype (pg_range.rngcollation), and always used, so @@ -3008,9 +2991,7 @@ ri_CompareWithCast(Oid eq_opr, Oid typeid, * But if we support arbitrary types with PERIOD, we should perhaps just * always force a re-check. */ - return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, - DEFAULT_COLLATION_OID, - lhs, rhs)); + return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, collid, lhs, rhs)); } /* |