mirror of https://github.com/postgres/postgres
Unicode escapes in strings and identifiers
This commit is contained in:
parent
05bba3d176
commit
06735e3256
|
@ -1,4 +1,4 @@
|
|||
<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.123 2008/06/26 22:24:42 momjian Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.124 2008/10/29 08:04:52 petere Exp $ -->
|
||||
|
||||
<chapter id="sql-syntax">
|
||||
<title>SQL Syntax</title>
|
||||
|
@ -189,6 +189,57 @@ UPDATE "my_table" SET "a" = 5;
|
|||
ampersands. The length limitation still applies.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<indexterm><primary>Unicode escape</primary><secondary>in
|
||||
identifiers</secondary></indexterm> A variant of quoted
|
||||
identifiers allows including escaped Unicode characters identified
|
||||
by their code points. This variant starts
|
||||
with <literal>U&</literal> (upper or lower case U followed by
|
||||
ampersand) immediately before the opening double quote, without
|
||||
any spaces in between, for example <literal>U&"foo"</literal>.
|
||||
(Note that this creates an ambiguity with the
|
||||
operator <literal>&</literal>. Use spaces around the operator to
|
||||
avoid this problem.) Inside the quotes, Unicode characters can be
|
||||
specified in escaped form by writing a backslash followed by the
|
||||
four-digit hexadecimal code point number or alternatively a
|
||||
backslash followed by a plus sign followed by a six-digit
|
||||
hexadecimal code point number. For example, the
|
||||
identifier <literal>"data"</literal> could be written as
|
||||
<programlisting>
|
||||
U&"d\0061t\+000061"
|
||||
</programlisting>
|
||||
The following less trivial example writes the Russian
|
||||
word <quote>slon</quote> (elephant) in Cyrillic letters:
|
||||
<programlisting>
|
||||
U&"\0441\043B\043E\043D"
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If a different escape character than backslash is desired, it can
|
||||
be specified using
|
||||
the <literal>UESCAPE</literal><indexterm><primary>UESCAPE</primary></indexterm>
|
||||
clause after the string, for example:
|
||||
<programlisting>
|
||||
U&"d!0061t!+000061" UESCAPE '!'
|
||||
</programlisting>
|
||||
The escape character can be any single character other than a
|
||||
hexadecimal digit, the plus sign, a single quote, a double quote,
|
||||
or a whitespace character. Note that the escape character is
|
||||
written in single quotes, not double quotes.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To include the escape character in the identifier literally, write
|
||||
it twice.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The Unicode escape syntax works only when the server encoding is
|
||||
UTF8. When other server encodings are used, only code points in
|
||||
the ASCII range (up to <literal>\007F</literal>) can be specified.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Quoting an identifier also makes it case-sensitive, whereas
|
||||
unquoted names are always folded to lower case. For example, the
|
||||
|
@ -245,7 +296,7 @@ UPDATE "my_table" SET "a" = 5;
|
|||
write two adjacent single quotes, e.g.
|
||||
<literal>'Dianne''s horse'</literal>.
|
||||
Note that this is <emphasis>not</> the same as a double-quote
|
||||
character (<literal>"</>).
|
||||
character (<literal>"</>). <!-- font-lock sanity: " -->
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
@ -269,14 +320,19 @@ SELECT 'foo' 'bar';
|
|||
by <acronym>SQL</acronym>; <productname>PostgreSQL</productname> is
|
||||
following the standard.)
|
||||
</para>
|
||||
</sect3>
|
||||
|
||||
<para>
|
||||
<indexterm>
|
||||
<sect3 id="sql-syntax-strings-escape">
|
||||
<title>String Constants with C-Style Escapes</title>
|
||||
|
||||
<indexterm zone="sql-syntax-strings-escape">
|
||||
<primary>escape string syntax</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<indexterm zone="sql-syntax-strings-escape">
|
||||
<primary>backslash escapes</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
<productname>PostgreSQL</productname> also accepts <quote>escape</>
|
||||
string constants, which are an extension to the SQL standard.
|
||||
An escape string constant is specified by writing the letter
|
||||
|
@ -287,7 +343,8 @@ SELECT 'foo' 'bar';
|
|||
Within an escape string, a backslash character (<literal>\</>) begins a
|
||||
C-like <firstterm>backslash escape</> sequence, in which the combination
|
||||
of backslash and following character(s) represent a special byte
|
||||
value:
|
||||
value, as shown in <xref linkend="sql-backslash-table">.
|
||||
</para>
|
||||
|
||||
<table id="sql-backslash-table">
|
||||
<title>Backslash Escape Sequences</title>
|
||||
|
@ -341,14 +398,24 @@ SELECT 'foo' 'bar';
|
|||
</tgroup>
|
||||
</table>
|
||||
|
||||
It is your responsibility that the byte sequences you create are
|
||||
valid characters in the server character set encoding. Any other
|
||||
<para>
|
||||
Any other
|
||||
character following a backslash is taken literally. Thus, to
|
||||
include a backslash character, write two backslashes (<literal>\\</>).
|
||||
Also, a single quote can be included in an escape string by writing
|
||||
<literal>\'</literal>, in addition to the normal way of <literal>''</>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
It is your responsibility that the byte sequences you create are
|
||||
valid characters in the server character set encoding. When the
|
||||
server encoding is UTF-8, then the alternative Unicode escape
|
||||
syntax, explained in <xref linkend="sql-syntax-strings-uescape">,
|
||||
should be used instead. (The alternative would be doing the
|
||||
UTF-8 encoding by hand and writing out the bytes, which would be
|
||||
very cumbersome.)
|
||||
</para>
|
||||
|
||||
<caution>
|
||||
<para>
|
||||
If the configuration parameter
|
||||
|
@ -379,6 +446,65 @@ SELECT 'foo' 'bar';
|
|||
</para>
|
||||
</sect3>
|
||||
|
||||
<sect3 id="sql-syntax-strings-uescape">
|
||||
<title>String Constants with Unicode Escapes</title>
|
||||
|
||||
<indexterm zone="sql-syntax-strings-uescape">
|
||||
<primary>Unicode escape</primary>
|
||||
<secondary>in string constants</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
<productname>PostgreSQL</productname> also supports another type
|
||||
of escape syntax for strings that allows specifying arbitrary
|
||||
Unicode characters by code point. A Unicode escape string
|
||||
constant starts with <literal>U&</literal> (upper or lower case
|
||||
letter U followed by ampersand) immediately before the opening
|
||||
quote, without any spaces in between, for
|
||||
example <literal>U&'foo'</literal>. (Note that this creates an
|
||||
ambiguity with the operator <literal>&</literal>. Use spaces
|
||||
around the operator to avoid this problem.) Inside the quotes,
|
||||
Unicode characters can be specified in escaped form by writing a
|
||||
backslash followed by the four-digit hexadecimal code point
|
||||
number or alternatively a backslash followed by a plus sign
|
||||
followed by a six-digit hexadecimal code point number. For
|
||||
example, the string <literal>'data'</literal> could be written as
|
||||
<programlisting>
|
||||
U&'d\0061t\+000061'
|
||||
</programlisting>
|
||||
The following less trivial example writes the Russian
|
||||
word <quote>slon</quote> (elephant) in Cyrillic letters:
|
||||
<programlisting>
|
||||
U&'\0441\043B\043E\043D'
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If a different escape character than backslash is desired, it can
|
||||
be specified using
|
||||
the <literal>UESCAPE</literal><indexterm><primary>UESCAPE</primary></indexterm>
|
||||
clause after the string, for example:
|
||||
<programlisting>
|
||||
U&'d!0061t!+000061' UESCAPE '!'
|
||||
</programlisting>
|
||||
The escape character can be any single character other than a
|
||||
hexadecimal digit, the plus sign, a single quote, a double quote,
|
||||
or a whitespace character.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The Unicode escape syntax works only when the server encoding is
|
||||
UTF8. When other server encodings are used, only code points in
|
||||
the ASCII range (up to <literal>\007F</literal>) can be
|
||||
specified.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To include the escape character in the string literally, write it
|
||||
twice.
|
||||
</para>
|
||||
</sect3>
|
||||
|
||||
<sect3 id="sql-syntax-dollar-quoting">
|
||||
<title>Dollar-Quoted String Constants</title>
|
||||
|
||||
|
|
|
@ -238,8 +238,8 @@ F381 Extended schema manipulation 02 ALTER TABLE statement: ADD CONSTRAINT claus
|
|||
F381 Extended schema manipulation 03 ALTER TABLE statement: DROP CONSTRAINT clause YES
|
||||
F382 Alter column data type YES
|
||||
F391 Long identifiers YES
|
||||
F392 Unicode escapes in identifiers NO
|
||||
F393 Unicode escapes in literals NO
|
||||
F392 Unicode escapes in identifiers YES
|
||||
F393 Unicode escapes in literals YES
|
||||
F394 Optional normal form specification NO
|
||||
F401 Extended joined table YES
|
||||
F401 Extended joined table 01 NATURAL JOIN YES
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.146 2008/09/01 20:42:45 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.147 2008/10/29 08:04:52 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -76,6 +76,7 @@ static int literalalloc; /* current allocated buffer size */
|
|||
static void addlit(char *ytext, int yleng);
|
||||
static void addlitchar(unsigned char ychar);
|
||||
static char *litbufdup(void);
|
||||
static char *litbuf_udeescape(unsigned char escape);
|
||||
|
||||
#define lexer_errposition() scanner_errposition(yylloc)
|
||||
|
||||
|
@ -125,6 +126,8 @@ static unsigned char unescape_single_char(unsigned char c);
|
|||
* <xq> standard quoted strings
|
||||
* <xe> extended quoted strings (support backslash escape sequences)
|
||||
* <xdolq> $foo$ quoted strings
|
||||
* <xui> quoted identifier with Unicode escapes
|
||||
* <xus> quoted string with Unicode escapes
|
||||
*/
|
||||
|
||||
%x xb
|
||||
|
@ -134,6 +137,8 @@ static unsigned char unescape_single_char(unsigned char c);
|
|||
%x xe
|
||||
%x xq
|
||||
%x xdolq
|
||||
%x xui
|
||||
%x xus
|
||||
|
||||
/*
|
||||
* In order to make the world safe for Windows and Mac clients as well as
|
||||
|
@ -244,6 +249,25 @@ xdstop {dquote}
|
|||
xddouble {dquote}{dquote}
|
||||
xdinside [^"]+
|
||||
|
||||
/* Unicode escapes */
|
||||
uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
|
||||
/* error rule to avoid backup */
|
||||
uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
|
||||
|
||||
/* Quoted identifier with Unicode escapes */
|
||||
xuistart [uU]&{dquote}
|
||||
xuistop1 {dquote}{whitespace}*{uescapefail}?
|
||||
xuistop2 {dquote}{whitespace}*{uescape}
|
||||
|
||||
/* Quoted string with Unicode escapes */
|
||||
xusstart [uU]&{quote}
|
||||
xusstop1 {quote}{whitespace}*{uescapefail}?
|
||||
xusstop2 {quote}{whitespace}*{uescape}
|
||||
|
||||
/* error rule to avoid backup */
|
||||
xufailed [uU]&
|
||||
|
||||
|
||||
/* C-style comments
|
||||
*
|
||||
* The "extended comment" syntax closely resembles allowable operator syntax.
|
||||
|
@ -444,6 +468,11 @@ other .
|
|||
BEGIN(xe);
|
||||
startlit();
|
||||
}
|
||||
{xusstart} {
|
||||
SET_YYLLOC();
|
||||
BEGIN(xus);
|
||||
startlit();
|
||||
}
|
||||
<xq,xe>{quotestop} |
|
||||
<xq,xe>{quotefail} {
|
||||
yyless(1);
|
||||
|
@ -456,10 +485,22 @@ other .
|
|||
yylval.str = litbufdup();
|
||||
return SCONST;
|
||||
}
|
||||
<xq,xe>{xqdouble} {
|
||||
<xus>{xusstop1} {
|
||||
/* throw back all but the quote */
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
yylval.str = litbuf_udeescape('\\');
|
||||
return SCONST;
|
||||
}
|
||||
<xus>{xusstop2} {
|
||||
BEGIN(INITIAL);
|
||||
yylval.str = litbuf_udeescape(yytext[yyleng-2]);
|
||||
return SCONST;
|
||||
}
|
||||
<xq,xe,xus>{xqdouble} {
|
||||
addlitchar('\'');
|
||||
}
|
||||
<xq>{xqinside} {
|
||||
<xq,xus>{xqinside} {
|
||||
addlit(yytext, yyleng);
|
||||
}
|
||||
<xe>{xeinside} {
|
||||
|
@ -496,14 +537,14 @@ other .
|
|||
if (IS_HIGHBIT_SET(c))
|
||||
saw_high_bit = true;
|
||||
}
|
||||
<xq,xe>{quotecontinue} {
|
||||
<xq,xe,xus>{quotecontinue} {
|
||||
/* ignore */
|
||||
}
|
||||
<xe>. {
|
||||
/* This is only needed for \ just before EOF */
|
||||
addlitchar(yytext[0]);
|
||||
}
|
||||
<xq,xe><<EOF>> { yyerror("unterminated quoted string"); }
|
||||
<xq,xe,xus><<EOF>> { yyerror("unterminated quoted string"); }
|
||||
|
||||
{dolqdelim} {
|
||||
SET_YYLLOC();
|
||||
|
@ -553,6 +594,11 @@ other .
|
|||
BEGIN(xd);
|
||||
startlit();
|
||||
}
|
||||
{xuistart} {
|
||||
SET_YYLLOC();
|
||||
BEGIN(xui);
|
||||
startlit();
|
||||
}
|
||||
<xd>{xdstop} {
|
||||
char *ident;
|
||||
|
||||
|
@ -565,13 +611,46 @@ other .
|
|||
yylval.str = ident;
|
||||
return IDENT;
|
||||
}
|
||||
<xd>{xddouble} {
|
||||
<xui>{xuistop1} {
|
||||
char *ident;
|
||||
|
||||
BEGIN(INITIAL);
|
||||
if (literallen == 0)
|
||||
yyerror("zero-length delimited identifier");
|
||||
ident = litbuf_udeescape('\\');
|
||||
if (literallen >= NAMEDATALEN)
|
||||
truncate_identifier(ident, literallen, true);
|
||||
yylval.str = ident;
|
||||
/* throw back all but the quote */
|
||||
yyless(1);
|
||||
return IDENT;
|
||||
}
|
||||
<xui>{xuistop2} {
|
||||
char *ident;
|
||||
|
||||
BEGIN(INITIAL);
|
||||
if (literallen == 0)
|
||||
yyerror("zero-length delimited identifier");
|
||||
ident = litbuf_udeescape(yytext[yyleng - 2]);
|
||||
if (literallen >= NAMEDATALEN)
|
||||
truncate_identifier(ident, literallen, true);
|
||||
yylval.str = ident;
|
||||
return IDENT;
|
||||
}
|
||||
<xd,xui>{xddouble} {
|
||||
addlitchar('"');
|
||||
}
|
||||
<xd>{xdinside} {
|
||||
<xd,xui>{xdinside} {
|
||||
addlit(yytext, yyleng);
|
||||
}
|
||||
<xd><<EOF>> { yyerror("unterminated quoted identifier"); }
|
||||
<xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
|
||||
|
||||
{xufailed} {
|
||||
/* throw back all but the initial u/U */
|
||||
yyless(1);
|
||||
/* and treat it as {other} */
|
||||
return yytext[0];
|
||||
}
|
||||
|
||||
{typecast} {
|
||||
SET_YYLLOC();
|
||||
|
@ -908,6 +987,99 @@ litbufdup(void)
|
|||
return new;
|
||||
}
|
||||
|
||||
static int
|
||||
hexval(unsigned char c)
|
||||
{
|
||||
if (c >= '0' && c <= '9')
|
||||
return c - '0';
|
||||
if (c >= 'a' && c <= 'f')
|
||||
return c - 'a' + 0xA;
|
||||
if (c >= 'A' && c <= 'F')
|
||||
return c - 'A' + 0xA;
|
||||
elog(ERROR, "invalid hexadecimal digit");
|
||||
return 0; /* not reached */
|
||||
}
|
||||
|
||||
static void
|
||||
check_unicode_value(pg_wchar c, char * loc)
|
||||
{
|
||||
if (GetDatabaseEncoding() == PG_UTF8)
|
||||
return;
|
||||
|
||||
if (c > 0x7F)
|
||||
{
|
||||
yylloc += (char *) loc - literalbuf + 3; /* 3 for U&" */
|
||||
yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
litbuf_udeescape(unsigned char escape)
|
||||
{
|
||||
char *new;
|
||||
char *in, *out;
|
||||
|
||||
if (isxdigit(escape)
|
||||
|| escape == '+'
|
||||
|| escape == '\''
|
||||
|| escape == '"'
|
||||
|| scanner_isspace(escape))
|
||||
{
|
||||
yylloc += literallen + yyleng + 1;
|
||||
yyerror("invalid Unicode escape character");
|
||||
}
|
||||
|
||||
/*
|
||||
* This relies on the subtle assumption that a UTF-8 expansion
|
||||
* cannot be longer than its escaped representation.
|
||||
*/
|
||||
new = palloc(literallen + 1);
|
||||
|
||||
in = literalbuf;
|
||||
out = new;
|
||||
while (*in)
|
||||
{
|
||||
if (in[0] == escape)
|
||||
{
|
||||
if (in[1] == escape)
|
||||
{
|
||||
*out++ = escape;
|
||||
in += 2;
|
||||
}
|
||||
else if (isxdigit(in[1]) && isxdigit(in[2]) && isxdigit(in[3]) && isxdigit(in[4]))
|
||||
{
|
||||
pg_wchar unicode = hexval(in[1]) * 16*16*16 + hexval(in[2]) * 16*16 + hexval(in[3]) * 16 + hexval(in[4]);
|
||||
check_unicode_value(unicode, in);
|
||||
unicode_to_utf8(unicode, (unsigned char *) out);
|
||||
in += 5;
|
||||
out += pg_mblen(out);
|
||||
}
|
||||
else if (in[1] == '+'
|
||||
&& isxdigit(in[2]) && isxdigit(in[3])
|
||||
&& isxdigit(in[4]) && isxdigit(in[5])
|
||||
&& isxdigit(in[6]) && isxdigit(in[7]))
|
||||
{
|
||||
pg_wchar unicode = hexval(in[2]) * 16*16*16*16*16 + hexval(in[3]) * 16*16*16*16 + hexval(in[4]) * 16*16*16
|
||||
+ hexval(in[5]) * 16*16 + hexval(in[6]) * 16 + hexval(in[7]);
|
||||
check_unicode_value(unicode, in);
|
||||
unicode_to_utf8(unicode, (unsigned char *) out);
|
||||
in += 8;
|
||||
out += pg_mblen(out);
|
||||
}
|
||||
else
|
||||
{
|
||||
yylloc += in - literalbuf + 3; /* 3 for U&" */
|
||||
yyerror("invalid Unicode escape value");
|
||||
}
|
||||
}
|
||||
else
|
||||
*out++ = *in++;
|
||||
}
|
||||
|
||||
*out = '\0';
|
||||
pg_verifymbstr(new, out - new, false);
|
||||
return new;
|
||||
}
|
||||
|
||||
static unsigned char
|
||||
unescape_single_char(unsigned char c)
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.79 2008/10/14 17:12:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.80 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -1497,28 +1497,7 @@ unicode_to_sqlchar(pg_wchar c)
|
|||
{
|
||||
static unsigned char utf8string[5]; /* need trailing zero */
|
||||
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
utf8string[0] = c;
|
||||
}
|
||||
else if (c <= 0x7FF)
|
||||
{
|
||||
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
||||
utf8string[1] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else if (c <= 0xFFFF)
|
||||
{
|
||||
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
||||
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[2] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else
|
||||
{
|
||||
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
||||
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
||||
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[3] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
unicode_to_utf8(c, utf8string);
|
||||
|
||||
return (char *) pg_do_encoding_conversion(utf8string,
|
||||
pg_mblen((char *) utf8string),
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* conversion functions between pg_wchar and multibyte streams.
|
||||
* Tatsuo Ishii
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.67 2008/10/27 19:37:22 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.68 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
*/
|
||||
/* can be used in either frontend or backend */
|
||||
|
@ -419,6 +419,41 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
|||
return cnt;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
|
||||
* space allocated.
|
||||
*/
|
||||
unsigned char *
|
||||
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
|
||||
{
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
utf8string[0] = c;
|
||||
}
|
||||
else if (c <= 0x7FF)
|
||||
{
|
||||
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
||||
utf8string[1] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else if (c <= 0xFFFF)
|
||||
{
|
||||
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
||||
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[2] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else
|
||||
{
|
||||
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
||||
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
||||
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[3] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
|
||||
return utf8string;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return the byte length of a UTF8 character pointed to by s
|
||||
*
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.25 2008/05/09 15:36:31 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.26 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -156,6 +156,8 @@ static void emit(const char *txt, int len);
|
|||
* <xq> standard quoted strings
|
||||
* <xe> extended quoted strings (support backslash escape sequences)
|
||||
* <xdolq> $foo$ quoted strings
|
||||
* <xui> quoted identifier with Unicode escapes
|
||||
* <xus> quoted string with Unicode escapes
|
||||
*/
|
||||
|
||||
%x xb
|
||||
|
@ -165,6 +167,8 @@ static void emit(const char *txt, int len);
|
|||
%x xe
|
||||
%x xq
|
||||
%x xdolq
|
||||
%x xui
|
||||
%x xus
|
||||
/* Additional exclusive states for psql only: lex backslash commands */
|
||||
%x xslashcmd
|
||||
%x xslasharg
|
||||
|
@ -281,6 +285,25 @@ xdstop {dquote}
|
|||
xddouble {dquote}{dquote}
|
||||
xdinside [^"]+
|
||||
|
||||
/* Unicode escapes */
|
||||
uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
|
||||
/* error rule to avoid backup */
|
||||
uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
|
||||
|
||||
/* Quoted identifier with Unicode escapes */
|
||||
xuistart [uU]&{dquote}
|
||||
xuistop1 {dquote}{whitespace}*{uescapefail}?
|
||||
xuistop2 {dquote}{whitespace}*{uescape}
|
||||
|
||||
/* Quoted string with Unicode escapes */
|
||||
xusstart [uU]&{quote}
|
||||
xusstop1 {quote}{whitespace}*{uescapefail}?
|
||||
xusstop2 {quote}{whitespace}*{uescape}
|
||||
|
||||
/* error rule to avoid backup */
|
||||
xufailed [uU]&
|
||||
|
||||
|
||||
/* C-style comments
|
||||
*
|
||||
* The "extended comment" syntax closely resembles allowable operator syntax.
|
||||
|
@ -460,16 +483,29 @@ other .
|
|||
BEGIN(xe);
|
||||
ECHO;
|
||||
}
|
||||
{xusstart} {
|
||||
BEGIN(xus);
|
||||
ECHO;
|
||||
}
|
||||
<xq,xe>{quotestop} |
|
||||
<xq,xe>{quotefail} {
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
ECHO;
|
||||
}
|
||||
<xq,xe>{xqdouble} {
|
||||
<xus>{xusstop1} {
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
ECHO;
|
||||
}
|
||||
<xq>{xqinside} {
|
||||
<xus>{xusstop2} {
|
||||
BEGIN(INITIAL);
|
||||
ECHO;
|
||||
}
|
||||
<xq,xe,xus>{xqdouble} {
|
||||
ECHO;
|
||||
}
|
||||
<xq,xus>{xqinside} {
|
||||
ECHO;
|
||||
}
|
||||
<xe>{xeinside} {
|
||||
|
@ -484,7 +520,7 @@ other .
|
|||
<xe>{xehexesc} {
|
||||
ECHO;
|
||||
}
|
||||
<xq,xe>{quotecontinue} {
|
||||
<xq,xe,xus>{quotecontinue} {
|
||||
ECHO;
|
||||
}
|
||||
<xe>. {
|
||||
|
@ -535,14 +571,33 @@ other .
|
|||
BEGIN(xd);
|
||||
ECHO;
|
||||
}
|
||||
{xuistart} {
|
||||
BEGIN(xui);
|
||||
ECHO;
|
||||
}
|
||||
<xd>{xdstop} {
|
||||
BEGIN(INITIAL);
|
||||
ECHO;
|
||||
}
|
||||
<xd>{xddouble} {
|
||||
<xui>{xuistop1} {
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
ECHO;
|
||||
}
|
||||
<xd>{xdinside} {
|
||||
<xui>{xuistop2} {
|
||||
BEGIN(INITIAL);
|
||||
ECHO;
|
||||
}
|
||||
<xd,xui>{xddouble} {
|
||||
ECHO;
|
||||
}
|
||||
<xd,xui>{xdinside} {
|
||||
ECHO;
|
||||
}
|
||||
|
||||
{xufailed} {
|
||||
/* throw back all but the initial u/U */
|
||||
yyless(1);
|
||||
ECHO;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.80 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This is used both by the backend and by libpq, but should not be
|
||||
|
@ -380,6 +380,7 @@ extern const char *GetDatabaseEncodingName(void);
|
|||
extern int pg_valid_client_encoding(const char *name);
|
||||
extern int pg_valid_server_encoding(const char *name);
|
||||
|
||||
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
|
||||
extern int pg_utf_mblen(const unsigned char *);
|
||||
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
|
||||
int src_encoding,
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.166 2008/05/20 23:17:32 meskes Exp $
|
||||
* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.167 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -103,6 +103,8 @@ static struct _if_value
|
|||
* <xe> extended quoted strings (support backslash escape sequences)
|
||||
* <xn> national character quoted strings
|
||||
* <xdolq> $foo$ quoted strings
|
||||
* <xui> quoted identifier with Unicode escapes
|
||||
* <xus> quoted string with Unicode escapes
|
||||
*/
|
||||
|
||||
%x xb
|
||||
|
@ -117,6 +119,8 @@ static struct _if_value
|
|||
%x xdolq
|
||||
%x xcond
|
||||
%x xskip
|
||||
%x xui
|
||||
%x xus
|
||||
|
||||
/* Bit string
|
||||
*/
|
||||
|
@ -172,6 +176,18 @@ xdstop {dquote}
|
|||
xddouble {dquote}{dquote}
|
||||
xdinside [^"]+
|
||||
|
||||
/* Unicode escapes */
|
||||
/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are not needed here, but could be added if desired.) */
|
||||
uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
|
||||
|
||||
/* Quoted identifier with Unicode escapes */
|
||||
xuistart [uU]&{dquote}
|
||||
xuistop {dquote}({whitespace}*{uescape})?
|
||||
|
||||
/* Quoted string with Unicode escapes */
|
||||
xusstart [uU]&{quote}
|
||||
xusstop {quote}({whitespace}*{uescape})?
|
||||
|
||||
/* special stuff for C strings */
|
||||
xdcqq \\\\
|
||||
xdcqdq \\\"
|
||||
|
@ -433,6 +449,13 @@ cppline {space}*#(.*\\{space})*.*{newline}
|
|||
BEGIN(xe);
|
||||
startlit();
|
||||
}
|
||||
<SQL>{xusstart} {
|
||||
token_start = yytext;
|
||||
state_before = YYSTATE;
|
||||
BEGIN(xus);
|
||||
startlit();
|
||||
addlit(yytext, yyleng);
|
||||
}
|
||||
<xq,xqc>{quotestop} |
|
||||
<xq,xqc>{quotefail} {
|
||||
yyless(1);
|
||||
|
@ -454,22 +477,28 @@ cppline {space}*#(.*\\{space})*.*{newline}
|
|||
yylval.str = mm_strdup(literalbuf);
|
||||
return NCONST;
|
||||
}
|
||||
<xq,xe,xn>{xqdouble} { addlitchar('\''); }
|
||||
<xus>{xusstop} {
|
||||
addlit(yytext, yyleng);
|
||||
BEGIN(state_before);
|
||||
yylval.str = mm_strdup(literalbuf);
|
||||
return UCONST;
|
||||
}
|
||||
<xq,xe,xn,xus>{xqdouble} { addlitchar('\''); }
|
||||
<xqc>{xqcquote} {
|
||||
addlitchar('\\');
|
||||
addlitchar('\'');
|
||||
}
|
||||
<xq,xqc,xn>{xqinside} { addlit(yytext, yyleng); }
|
||||
<xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); }
|
||||
<xe>{xeinside} { addlit(yytext, yyleng); }
|
||||
<xe>{xeescape} { addlit(yytext, yyleng); }
|
||||
<xe>{xeoctesc} { addlit(yytext, yyleng); }
|
||||
<xe>{xehexesc} { addlit(yytext, yyleng); }
|
||||
<xq,xqc,xe,xn>{quotecontinue} { /* ignore */ }
|
||||
<xq,xqc,xe,xn,xus>{quotecontinue} { /* ignore */ }
|
||||
<xe>. {
|
||||
/* This is only needed for \ just before EOF */
|
||||
addlitchar(yytext[0]);
|
||||
}
|
||||
<xq,xqc,xe,xn><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted string"); }
|
||||
<xq,xqc,xe,xn,xus><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted string"); }
|
||||
<SQL>{dolqfailed} {
|
||||
/* throw back all but the initial "$" */
|
||||
yyless(1);
|
||||
|
@ -515,6 +544,12 @@ cppline {space}*#(.*\\{space})*.*{newline}
|
|||
BEGIN(xd);
|
||||
startlit();
|
||||
}
|
||||
<SQL>{xuistart} {
|
||||
state_before = YYSTATE;
|
||||
BEGIN(xui);
|
||||
startlit();
|
||||
addlit(yytext, yyleng);
|
||||
}
|
||||
<xd>{xdstop} {
|
||||
BEGIN(state_before);
|
||||
if (literallen == 0)
|
||||
|
@ -528,9 +563,18 @@ cppline {space}*#(.*\\{space})*.*{newline}
|
|||
yylval.str = mm_strdup(literalbuf);
|
||||
return CSTRING;
|
||||
}
|
||||
<xd>{xddouble} { addlitchar('"'); }
|
||||
<xd>{xdinside} { addlit(yytext, yyleng); }
|
||||
<xd,xdc><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted identifier"); }
|
||||
<xui>{xuistop} {
|
||||
BEGIN(state_before);
|
||||
if (literallen == 2) /* "U&" */
|
||||
mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
|
||||
/* The backend will truncate the idnetifier here. We do not as it does not change the result. */
|
||||
addlit(yytext, yyleng);
|
||||
yylval.str = mm_strdup(literalbuf);
|
||||
return UIDENT;
|
||||
}
|
||||
<xd,xui>{xddouble} { addlitchar('"'); }
|
||||
<xd,xui>{xdinside} { addlit(yytext, yyleng); }
|
||||
<xd,xdc,xui><<EOF>> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted identifier"); }
|
||||
<C,SQL>{xdstart} {
|
||||
state_before = YYSTATE;
|
||||
BEGIN(xdc);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/preproc.y,v 1.379 2008/10/28 14:09:45 petere Exp $ */
|
||||
/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/preproc.y,v 1.380 2008/10/29 08:04:53 petere Exp $ */
|
||||
|
||||
/* Copyright comment */
|
||||
%{
|
||||
|
@ -509,7 +509,7 @@ add_typedef(char *name, char * dimension, char * length, enum ECPGttype type_enu
|
|||
|
||||
/* Special token types, not actually keywords - see the "lex" file */
|
||||
%token <str> IDENT SCONST Op CSTRING CVARIABLE CPP_LINE IP BCONST
|
||||
%token <str> XCONST DOLCONST ECONST NCONST
|
||||
%token <str> XCONST DOLCONST ECONST NCONST UCONST UIDENT
|
||||
%token <ival> ICONST PARAM
|
||||
%token <dval> FCONST
|
||||
|
||||
|
@ -4966,6 +4966,10 @@ Sconst: SCONST
|
|||
$$[strlen($1)+3]='\0';
|
||||
free($1);
|
||||
}
|
||||
| UCONST
|
||||
{
|
||||
$$ = $1;
|
||||
}
|
||||
| DOLCONST
|
||||
{
|
||||
$$ = $1;
|
||||
|
@ -7013,6 +7017,7 @@ cvariable: CVARIABLE
|
|||
;
|
||||
ident: IDENT { $$ = $1; }
|
||||
| CSTRING { $$ = make3_str(make_str("\""), $1, make_str("\"")); }
|
||||
| UIDENT { $$ = $1; }
|
||||
;
|
||||
|
||||
quoted_ident_stringvar: name
|
||||
|
|
|
@ -18,6 +18,7 @@ test: preproc/autoprep
|
|||
test: preproc/comment
|
||||
test: preproc/define
|
||||
test: preproc/init
|
||||
test: preproc/strings
|
||||
test: preproc/type
|
||||
test: preproc/variable
|
||||
test: preproc/whenever
|
||||
|
|
|
@ -18,6 +18,7 @@ test: preproc/autoprep
|
|||
test: preproc/comment
|
||||
test: preproc/define
|
||||
test: preproc/init
|
||||
test: preproc/strings
|
||||
test: preproc/type
|
||||
test: preproc/variable
|
||||
test: preproc/whenever
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
/* Processed by ecpg (regression mode) */
|
||||
/* These include files are added by the preprocessor */
|
||||
#include <ecpglib.h>
|
||||
#include <ecpgerrno.h>
|
||||
#include <sqlca.h>
|
||||
/* End of automatic include section */
|
||||
#define ECPGdebug(X,Y) ECPGdebug((X)+100,(Y))
|
||||
|
||||
#line 1 "strings.pgc"
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
#line 1 "regression.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#line 3 "strings.pgc"
|
||||
|
||||
|
||||
/* exec sql begin declare section */
|
||||
|
||||
|
||||
#line 6 "strings.pgc"
|
||||
char * s1 , * s2 , * s3 , * s4 , * s5 , * s6 ;
|
||||
/* exec sql end declare section */
|
||||
#line 7 "strings.pgc"
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
ECPGdebug(1, stderr);
|
||||
|
||||
{ ECPGconnect(__LINE__, 0, "regress1" , NULL, NULL , NULL, 0); }
|
||||
#line 13 "strings.pgc"
|
||||
|
||||
|
||||
{ ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 'abcdef' , N'abcdef' as foo , E'abc\\bdef' as \"foo\" , U&'d\\0061t\\0061' as U&\"foo\" , U&'d!+000061t!+000061' uescape '!' , $foo$abc$def$foo$ ", ECPGt_EOIT,
|
||||
ECPGt_char,&(s1),(long)0,(long)1,(1)*sizeof(char),
|
||||
ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
|
||||
ECPGt_char,&(s2),(long)0,(long)1,(1)*sizeof(char),
|
||||
ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
|
||||
ECPGt_char,&(s3),(long)0,(long)1,(1)*sizeof(char),
|
||||
ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
|
||||
ECPGt_char,&(s4),(long)0,(long)1,(1)*sizeof(char),
|
||||
ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
|
||||
ECPGt_char,&(s5),(long)0,(long)1,(1)*sizeof(char),
|
||||
ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
|
||||
ECPGt_char,&(s6),(long)0,(long)1,(1)*sizeof(char),
|
||||
ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);}
|
||||
#line 21 "strings.pgc"
|
||||
|
||||
|
||||
printf("%s %s %s %s %s %s\n", s1, s2, s3, s4, s5, s6);
|
||||
|
||||
{ ECPGdisconnect(__LINE__, "CURRENT");}
|
||||
#line 25 "strings.pgc"
|
||||
|
||||
exit (0);
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
[NO_PID]: ECPGdebug: set to 1
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ECPGconnect: opening database regress1 on <DEFAULT> port <DEFAULT>
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_execute on line 15: query: select 'abcdef' , N'abcdef' as foo , E'abc\bdef' as "foo" , U&'d\0061t\0061' as U&"foo" , U&'d!+000061t!+000061' uescape '!' , $foo$abc$def$foo$ ; with 0 parameter(s) on connection regress1
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_execute on line 15: using PQexec
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_execute on line 15: correctly got 1 tuples with 6 fields
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: yes
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: yes
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: yes
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_get_data on line 15: RESULT: data offset: -1; array: yes
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_get_data on line 15: RESULT: data offset: -1; array: yes
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_get_data on line 15: RESULT: abc$def offset: -1; array: yes
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_finish: connection regress1 closed
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
|
@ -0,0 +1 @@
|
|||
abcdef abcdef abcdef data data abc$def
|
|
@ -9,6 +9,7 @@ TESTS = array_of_struct array_of_struct.c \
|
|||
comment comment.c \
|
||||
define define.c \
|
||||
init init.c \
|
||||
strings strings.c \
|
||||
type type.c \
|
||||
variable variable.c \
|
||||
whenever whenever.c
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
exec sql include ../regression;
|
||||
|
||||
exec sql begin declare section;
|
||||
char *s1, *s2, *s3, *s4, *s5, *s6;
|
||||
exec sql end declare section;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
ECPGdebug(1, stderr);
|
||||
|
||||
exec sql connect to REGRESSDB1;
|
||||
|
||||
exec sql select 'abcdef',
|
||||
N'abcdef' AS foo,
|
||||
E'abc\bdef' AS "foo",
|
||||
U&'d\0061t\0061' AS U&"foo",
|
||||
U&'d!+000061t!+000061' uescape '!',
|
||||
$foo$abc$def$foo$
|
||||
into :s1, :s2, :s3, :s4, :s5, :s6;
|
||||
|
||||
printf("%s %s %s %s %s %s\n", s1, s2, s3, s4, s5, s6);
|
||||
|
||||
exec sql disconnect;
|
||||
exit (0);
|
||||
}
|
|
@ -21,6 +21,31 @@ SELECT 'first line'
|
|||
ERROR: syntax error at or near "' - third line'"
|
||||
LINE 3: ' - third line'
|
||||
^
|
||||
-- Unicode escapes
|
||||
SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
|
||||
data
|
||||
------
|
||||
data
|
||||
(1 row)
|
||||
|
||||
SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
|
||||
dat\+000061
|
||||
-------------
|
||||
dat\+000061
|
||||
(1 row)
|
||||
|
||||
SELECT U&'wrong: \061';
|
||||
ERROR: invalid Unicode escape value at or near "\061'"
|
||||
LINE 1: SELECT U&'wrong: \061';
|
||||
^
|
||||
SELECT U&'wrong: \+0061';
|
||||
ERROR: invalid Unicode escape value at or near "\+0061'"
|
||||
LINE 1: SELECT U&'wrong: \+0061';
|
||||
^
|
||||
SELECT U&'wrong: +0061' UESCAPE '+';
|
||||
ERROR: invalid Unicode escape character at or near "+'"
|
||||
LINE 1: SELECT U&'wrong: +0061' UESCAPE '+';
|
||||
^
|
||||
--
|
||||
-- test conversions between various string types
|
||||
-- E021-10 implicit casting among the character data types
|
||||
|
|
|
@ -16,6 +16,14 @@ SELECT 'first line'
|
|||
' - third line'
|
||||
AS "Illegal comment within continuation";
|
||||
|
||||
-- Unicode escapes
|
||||
SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
|
||||
SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
|
||||
|
||||
SELECT U&'wrong: \061';
|
||||
SELECT U&'wrong: \+0061';
|
||||
SELECT U&'wrong: +0061' UESCAPE '+';
|
||||
|
||||
--
|
||||
-- test conversions between various string types
|
||||
-- E021-10 implicit casting among the character data types
|
||||
|
|
Loading…
Reference in New Issue