diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2005-12-22 22:50:07 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2005-12-22 22:50:07 +0000 |
commit | e6242ba176278ff02af1bb2069a32fc83c176869 (patch) | |
tree | 87b4b95b9aba3abcbd13c1947bfd8a081a71ed19 /src/backend/utils/adt/varchar.c | |
parent | f545a05704e5148fec1f373820603cc7be73e7dd (diff) |
Adjust string comparison so that only bitwise-equal strings are considered
equal: if strcoll claims two strings are equal, check it with strcmp, and
sort according to strcmp if not identical. This fixes inconsistent
behavior under glibc's hu_HU locale, and probably under some other locales
as well. Also, take advantage of the now-well-defined behavior to speed up
texteq, textne, bpchareq, bpcharne: they may as well just do a bitwise
comparison and not bother with strcoll at all.
NOTE: affected databases may need to REINDEX indexes on text columns to be
sure they are self-consistent.
Diffstat (limited to 'src/backend/utils/adt/varchar.c')
-rw-r--r-- | src/backend/utils/adt/varchar.c | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 1377e7cc6d2..006b60ada0f 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.113 2005/10/15 02:49:30 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.113.2.1 2005/12/22 22:50:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -614,11 +614,14 @@ bpchareq(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* fast path for different-length inputs */ + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ if (len1 != len2) result = false; else - result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0); + result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -638,11 +641,14 @@ bpcharne(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* fast path for different-length inputs */ + /* + * Since we only care about equality or not-equality, we can avoid all + * the expense of strcoll() here, and just do bitwise comparison. + */ if (len1 != len2) result = true; else - result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0); + result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -789,7 +795,9 @@ bpchar_smaller(PG_FUNCTION_ARGS) * bpchar needs a specialized hash function because we want to ignore * trailing blanks in comparisons. * - * XXX is there any need for locale-specific behavior here? + * Note: currently there is no need for locale-specific behavior here, + * but if we ever change the semantics of bpchar comparison to trust + * strcoll() completely, we'd need to do something different in non-C locales. */ Datum hashbpchar(PG_FUNCTION_ARGS) |