Fail pgwin32_message_to_UTF16() for SQL_ASCII messages.

The function had been interpreting SQL_ASCII messages as UTF8, throwing an error when they were invalid UTF8. The new behavior is consistent with pg_do_encoding_conversion(). This affects LOG_DESTINATION_STDERR and LOG_DESTINATION_EVENTLOG, which will send untranslated bytes to write() and ReportEventA(). On buildfarm member bowerbird, enabling log_connections caused an error whenever the role name was not valid UTF8. Back-patch to 9.4 (all supported versions). Discussion: https://postgr.es/m/20190512015615.GD1124997@rfd.leadboat.com
author: Noah Misch <noah@leadboat.com> 2019-05-12 10:33:05 -0700
committer: Noah Misch <noah@leadboat.com> 2019-05-12 10:33:08 -0700
commit: 409f5303ced62246d36cabb8d4c5da5f7ce7f376 (patch)
tree: 9ed6ac8ebc318a6fea4dc82d921e89fba3ff96aa /src
parent: c3d113136bbaa86fbf1edde7aaf70ba06a6166b7 (diff)
3 files changed, 11 insertions, 10 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index e6f9151574d..8a04426990c 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -1057,11 +1057,16 @@ GetMessageEncoding(void)
 WCHAR *
 pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
 {
+	int			msgenc = GetMessageEncoding();
 	WCHAR	   *utf16;
 	int			dstlen;
 	UINT		codepage;
 
-	codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage;
+	if (msgenc == PG_SQL_ASCII)
+		/* No conversion is possible, and SQL_ASCII is never utf16. */
+		return NULL;
+
+	codepage = pg_enc2name_tbl[msgenc].codepage;
 
 	/*
 	 * Use MultiByteToWideChar directly if there is a corresponding codepage,
@@ -1086,7 +1091,7 @@ pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
 		{
 			utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
 													  len,
-													  GetMessageEncoding(),
+													  msgenc,
 													  PG_UTF8);
 			if (utf8 != str)
 				len = strlen(utf8);
diff --git a/src/bin/pg_dump/t/010_dump_connstr.pl b/src/bin/pg_dump/t/010_dump_connstr.pl
index c592cb6ff66..6807b324669 100644
--- a/src/bin/pg_dump/t/010_dump_connstr.pl
+++ b/src/bin/pg_dump/t/010_dump_connstr.pl
@@ -14,10 +14,8 @@ else
 	plan tests => 14;
 }
 
-# In a SQL_ASCII database, pgwin32_message_to_UTF16() needs to
-# interpret everything as UTF8.  We're going to use byte sequences
-# that aren't valid UTF-8 strings, so that would fail.  Use LATIN1,
-# which accepts any byte and has a conversion from each byte to UTF-8.
+# We're going to use byte sequences that aren't valid UTF-8 strings.  Use
+# LATIN1, which accepts any byte and has a conversion from each byte to UTF-8.
 $ENV{LC_ALL}           = 'C';
 $ENV{PGCLIENTENCODING} = 'LATIN1';
 
diff --git a/src/bin/scripts/t/200_connstr.pl b/src/bin/scripts/t/200_connstr.pl
index a3aeee762f3..ee2523d0858 100644
--- a/src/bin/scripts/t/200_connstr.pl
+++ b/src/bin/scripts/t/200_connstr.pl
@@ -7,10 +7,8 @@ use Test::More tests => 3;
 
 # Tests to check connection string handling in utilities
 
-# In a SQL_ASCII database, pgwin32_message_to_UTF16() needs to
-# interpret everything as UTF8.  We're going to use byte sequences
-# that aren't valid UTF-8 strings, so that would fail.  Use LATIN1,
-# which accepts any byte and has a conversion from each byte to UTF-8.
+# We're going to use byte sequences that aren't valid UTF-8 strings.  Use
+# LATIN1, which accepts any byte and has a conversion from each byte to UTF-8.
 $ENV{LC_ALL}           = 'C';
 $ENV{PGCLIENTENCODING} = 'LATIN1';
author	Noah Misch <noah@leadboat.com>	2019-05-12 10:33:05 -0700
committer	Noah Misch <noah@leadboat.com>	2019-05-12 10:33:08 -0700
commit	409f5303ced62246d36cabb8d4c5da5f7ce7f376 (patch)
tree	9ed6ac8ebc318a6fea4dc82d921e89fba3ff96aa /src
parent	c3d113136bbaa86fbf1edde7aaf70ba06a6166b7 (diff)