diff options
author | Andrew Dunstan <andrew@dunslane.net> | 2007-09-18 17:41:17 +0000 |
---|---|---|
committer | Andrew Dunstan <andrew@dunslane.net> | 2007-09-18 17:41:17 +0000 |
commit | 55613bf9cd7d6071e43e68ac14bc0243a1027507 (patch) | |
tree | 9b151f94d94e7dc3aa5988c03867d3f6f6b562ba /src/backend/utils/mb/wchar.c | |
parent | 8544110042ddf8be29e177e37f53516686a06da2 (diff) |
Close previously open holes for invalidly encoded data to enter the
database via builtin functions, as recently discussed on -hackers.
chr() now returns a character in the database encoding. For UTF8 encoded databases
the argument is treated as a Unicode code point. For other multi-byte encodings
the argument must designate a strict ascii character, or an error is raised,
as is also the case if the argument is 0.
ascii() is adjusted so that it remains the inverse of chr().
The two argument form of convert() is gone, and the three argument form now
takes a bytea first argument and returns a bytea. To cover this loss three new
functions are introduced:
. convert_from(bytea, name) returns text - converts the first argument from the
named encoding to the database encoding
. convert_to(text, name) returns bytea - converts the first argument from the
database encoding to the named encoding
. length(bytea, name) returns int - gives the length of the first argument in
characters in the named encoding
Diffstat (limited to 'src/backend/utils/mb/wchar.c')
-rw-r--r-- | src/backend/utils/mb/wchar.c | 42 |
1 files changed, 29 insertions, 13 deletions
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index cc8d4b58624..2c98f4b476e 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,9 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.63 2007/07/12 21:17:09 tgl Exp $ - * - * WIN1250 client encoding updated by Pavel Behal + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.64 2007/09/18 17:41:17 adunstan Exp $ * */ /* can be used in either frontend or backend */ @@ -1435,23 +1433,37 @@ pg_database_encoding_max_length(void) bool pg_verifymbstr(const char *mbstr, int len, bool noError) { - return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError); + return + pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0; } /* * Verify mbstr to make sure that it is validly encoded in the specified * encoding. * + */ +bool +pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) +{ + return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0; +} + +/* + * Verify mbstr to make sure that it is validly encoded in the specified + * encoding. + * * mbstr is not necessarily zero terminated; length of mbstr is * specified by len. * - * If OK, return TRUE. If a problem is found, return FALSE when noError is + * If OK, return length of string in the encoding. + * If a problem is found, return -1 when noError is * true; when noError is false, ereport() a descriptive message. - */ -bool -pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) + */ +int +pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) { mbverifier mbverify; + int mb_len; Assert(PG_VALID_ENCODING(encoding)); @@ -1463,14 +1475,16 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) const char *nullpos = memchr(mbstr, 0, len); if (nullpos == NULL) - return true; + return len; if (noError) - return false; + return -1; report_invalid_encoding(encoding, nullpos, 1); } /* fetch function pointer just once */ mbverify = pg_wchar_table[encoding].mbverify; + + mb_len = 0; while (len > 0) { @@ -1481,12 +1495,13 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) { if (*mbstr != '\0') { + mb_len++; mbstr++; len--; continue; } if (noError) - return false; + return -1; report_invalid_encoding(encoding, mbstr, len); } @@ -1495,14 +1510,15 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) if (l < 0) { if (noError) - return false; + return -1; report_invalid_encoding(encoding, mbstr, len); } mbstr += l; len -= l; + mb_len++; } - return true; + return mb_len; } /* |