summaryrefslogtreecommitdiff
path: root/src/backend/utils/mb/mbutils.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2022-12-11 11:28:15 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2022-12-11 11:28:15 -0500
commitc60c9badba9b4db3155eef081ae0b923f983168c (patch)
tree60b12d49fa9f01feff7707cef18fac7ebf24ab18 /src/backend/utils/mb/mbutils.c
parent50428a301d5ad46316cac2192f2ca8d91898aa3c (diff)
Convert json_in and jsonb_in to report errors softly.
This requires a bit of further infrastructure-extension to allow trapping errors reported by numeric_in and pg_unicode_to_server, but otherwise it's pretty straightforward. In the case of jsonb_in, we are only capturing errors reported during the initial "parse" phase. The value-construction phase (JsonbValueToJsonb) can also throw errors if assorted implementation limits are exceeded. We should improve that, but it seems like a separable project. Andrew Dunstan and Tom Lane Discussion: https://postgr.es/m/3bac9841-fe07-713d-fa42-606c225567d6@dunslane.net
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r--src/backend/utils/mb/mbutils.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 474ab476f5f..24f37e3ec98 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -916,6 +916,63 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
BoolGetDatum(false));
}
+/*
+ * Convert a single Unicode code point into a string in the server encoding.
+ *
+ * Same as pg_unicode_to_server(), except that we don't throw errors,
+ * but simply return false on conversion failure.
+ */
+bool
+pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
+{
+ unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
+ int c_as_utf8_len;
+ int converted_len;
+ int server_encoding;
+
+ /* Fail if invalid Unicode code point */
+ if (!is_valid_unicode_codepoint(c))
+ return false;
+
+ /* Otherwise, if it's in ASCII range, conversion is trivial */
+ if (c <= 0x7F)
+ {
+ s[0] = (unsigned char) c;
+ s[1] = '\0';
+ return true;
+ }
+
+ /* If the server encoding is UTF-8, we just need to reformat the code */
+ server_encoding = GetDatabaseEncoding();
+ if (server_encoding == PG_UTF8)
+ {
+ unicode_to_utf8(c, s);
+ s[pg_utf_mblen(s)] = '\0';
+ return true;
+ }
+
+ /* For all other cases, we must have a conversion function available */
+ if (Utf8ToServerConvProc == NULL)
+ return false;
+
+ /* Construct UTF-8 source string */
+ unicode_to_utf8(c, c_as_utf8);
+ c_as_utf8_len = pg_utf_mblen(c_as_utf8);
+ c_as_utf8[c_as_utf8_len] = '\0';
+
+ /* Convert, but without throwing error if we can't */
+ converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
+ Int32GetDatum(PG_UTF8),
+ Int32GetDatum(server_encoding),
+ CStringGetDatum((char *) c_as_utf8),
+ CStringGetDatum((char *) s),
+ Int32GetDatum(c_as_utf8_len),
+ BoolGetDatum(true)));
+
+ /* Conversion was successful iff it consumed the whole input */
+ return (converted_len == c_as_utf8_len);
+}
+
/* convert a multibyte string to a wchar */
int