summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-12-21 17:39:33 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2016-12-21 17:39:33 -0500
commita57695d9a815ce355e17c5cdec181f87bc9de3f9 (patch)
tree7c461649a7899b435501ccb458c5552076e936d4
parent1f2cfd26f50d3a7d885961aa68361d40d7753949 (diff)
Fix detection of unfinished Unicode surrogate pair at end of string.
The U&'...' and U&"..." syntaxes silently discarded a surrogate pair start (that is, a code between U+D800 and U+DBFF) if it occurred at the very end of the string. This seems like an obvious oversight, since we throw an error for every other invalid combination of surrogate characters, including the very same situation in E'...' syntax. This has been wrong since the pair processing was added (in 9.0), so back-patch to all supported branches. Discussion: https://postgr.es/m/19113.1482337898@sss.pgh.pa.us
-rw-r--r--src/backend/parser/scan.l8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 5364942701c..85cf7a35cb1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -1395,7 +1395,15 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
}
}
+ /* unfinished surrogate pair? */
+ if (pair_first)
+ {
+ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
+ yyerror("invalid Unicode surrogate pair");
+ }
+
*out = '\0';
+
/*
* We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
* codes; but it's probably not worth the trouble, since this isn't