diff options
author | Noah Misch <noah@leadboat.com> | 2017-11-11 11:10:53 -0800 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2017-11-11 11:11:28 -0800 |
commit | d8406b9de910bf77d74ac644817b42fb4cf2ad03 (patch) | |
tree | 6d4468ee82ee78c1e50b2e900d3e04857b7e1387 /src/backend/utils/adt/xml.c | |
parent | 62906461c0d2765e0b7f8222b080670ddf1825b0 (diff) |
Ignore XML declaration in xpath_internal(), for UTF8 databases.
When a value contained an XML declaration naming some other encoding,
this function interpreted UTF8 bytes as the named encoding, yielding
mojibake. xml_parse() already has similar logic. This would be
necessary but not sufficient for non-UTF8 databases, so preserve
behavior there until the xpath facility can support such databases
comprehensively. Back-patch to 9.3 (all supported versions).
Pavel Stehule and Noah Misch
Discussion: https://postgr.es/m/CAFj8pRC-dM=tT=QkGi+Achkm+gwPmjyOayGuUfXVumCxkDgYWg@mail.gmail.com
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r-- | src/backend/utils/adt/xml.c | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index b27ab6a23c5..ad78af248ef 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -3792,6 +3792,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, int32 xpath_len; xmlChar *string; xmlChar *xpath_expr; + size_t xmldecl_len = 0; int i; int ndim; Datum *ns_names_uris; @@ -3852,6 +3853,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); xpath_expr[xpath_len] = '\0'; + /* + * In a UTF8 database, skip any xml declaration, which might assert + * another encoding. Ignore parse_xml_decl() failure, letting + * xmlCtxtReadMemory() report parse errors. Documentation disclaims + * xpath() support for non-ASCII data in non-UTF8 databases, so leave + * those scenarios bug-compatible with historical behavior. + */ + if (GetDatabaseEncoding() == PG_UTF8) + parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); @@ -3866,7 +3877,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, if (ctxt == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); - doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0); + doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len, + len - xmldecl_len, NULL, NULL, 0); if (doc == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document"); |