From c2bb0378cfcba28d57e357d0daa5ec895a51d8a8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 22 Sep 2009 23:52:53 +0000 Subject: Unicode escapes in E'...' strings Author: Marko Kreen --- doc/src/sgml/syntax.sgml | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index c805e2e7141..73db3235bd6 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,4 +1,4 @@ - + SQL Syntax @@ -398,6 +398,14 @@ SELECT 'foo' 'bar'; hexadecimal byte value + + + \uxxxx, + \Uxxxxxxxx + (x = 0 - 9, A - F) + + 16 or 32-bit hexadecimal Unicode character value + @@ -411,13 +419,25 @@ SELECT 'foo' 'bar'; - It is your responsibility that the byte sequences you create are + It is your responsibility that the byte sequences you create, + especially when using the octal or hexadecimal escapes, compose valid characters in the server character set encoding. When the - server encoding is UTF-8, then the alternative Unicode escape - syntax, explained in , - should be used instead. (The alternative would be doing the - UTF-8 encoding by hand and writing out the bytes, which would be - very cumbersome.) + server encoding is UTF-8, then the Unicode escapes or the + alternative Unicode escape syntax, explained + in , should be used + instead. (The alternative would be doing the UTF-8 encoding by + hand and writing out the bytes, which would be very cumbersome.) + + + + The Unicode escape syntax works fully only when the server + encoding is UTF-8. When other server encodings are used, only + code points in the ASCII range (up to \u007F) can be + specified. Both the 4-digit and the 8-digit form can be used to + specify UTF-16 surrogate pairs to compose characters with code + points larger than \FFFF (although the + availability of the 8-digit form technically makes this + unnecessary). -- cgit v1.2.3