summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-03-30 11:14:58 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-03-30 11:14:58 -0400
commitf15f5edee5b40812b5ac3e4cf431a507a4991b5e (patch)
tree189dac18f3ef0264a4982572d0c0dd8c6fc3c017 /src
parenta9120b0163f308b0a20ac473cc45499991e4e32f (diff)
Be more careful about extracting encoding from locale strings on Windows.
GetLocaleInfoEx() can fail on strings that setlocale() was perfectly happy with. A common way for that to happen is if the locale string is actually a Unix-style string, say "et_EE.UTF-8". In that case, what's after the dot is an encoding name, not a Windows codepage number; blindly treating it as a codepage number led to failure, with a fairly silly error message. Hence, check to see if what's after the dot is all digits, and if not, treat it as a literal encoding name rather than a codepage number. This will do the right thing with many Unix-style locale strings, and produce a more sensible error message otherwise. Somewhat independently of that, treat a zero (CP_ACP) result from GetLocaleInfoEx() as meaning that we must use UTF-8 encoding. Back-patch to all supported branches. Juan José Santamaría Flecha Discussion: https://postgr.es/m/24905.1585445371@sss.pgh.pa.us
Diffstat (limited to 'src')
-rw-r--r--src/port/chklocale.c29
1 files changed, 24 insertions, 5 deletions
diff --git a/src/port/chklocale.c b/src/port/chklocale.c
index 3c0ef6a2530..365e7f466bc 100644
--- a/src/port/chklocale.c
+++ b/src/port/chklocale.c
@@ -244,25 +244,44 @@ win32_langinfo(const char *ctype)
{
r = malloc(16); /* excess */
if (r != NULL)
- sprintf(r, "CP%u", cp);
+ {
+ /*
+ * If the return value is CP_ACP that means no ANSI code page is
+ * available, so only Unicode can be used for the locale.
+ */
+ if (cp == CP_ACP)
+ strcpy(r, "utf8");
+ else
+ sprintf(r, "CP%u", cp);
+ }
}
else
#endif
{
/*
- * Locale format on Win32 is <Language>_<Country>.<CodePage> . For
- * example, English_United States.1252.
+ * Locale format on Win32 is <Language>_<Country>.<CodePage>. For
+ * example, English_United States.1252. If we see digits after the
+ * last dot, assume it's a codepage number. Otherwise, we might be
+ * dealing with a Unix-style locale string; Windows' setlocale() will
+ * take those even though GetLocaleInfoEx() won't, so we end up here.
+ * In that case, just return what's after the last dot and hope we can
+ * find it in our table.
*/
codepage = strrchr(ctype, '.');
if (codepage != NULL)
{
- int ln;
+ size_t ln;
codepage++;
ln = strlen(codepage);
r = malloc(ln + 3);
if (r != NULL)
- sprintf(r, "CP%s", codepage);
+ {
+ if (strspn(codepage, "0123456789") == ln)
+ sprintf(r, "CP%s", codepage);
+ else
+ strcpy(r, codepage);
+ }
}
}