Code and docs review for commit 3187d6de0e5a9e805b27c48437897e8c39071d45.
Fix up check for high-bit-set characters, which provoked "comparison is always true due to limited range of data type" warnings on some compilers, and was unlike the way we do it elsewhere anyway. Fix omission of "$" from the set of valid identifier continuation characters. Get rid of sanitize_text(), which was utterly inconsistent with any other error report anywhere in the system, and wasn't even well designed on its own terms (double-quoting the result string without escaping contained double quotes doesn't seem very well thought out). Fix up error messages, which didn't follow the message style guidelines very well, and were overly specific in situations where the actual mistake might not be what they said. Improve documentation. (I started out just intending to fix the compiler warning, but the more I looked at the patch the less I liked it.)
This commit is contained in:
parent
499a50571c
commit
d12e5bb79b
@ -1823,25 +1823,22 @@
|
||||
<indexterm>
|
||||
<primary>parse_ident</primary>
|
||||
</indexterm>
|
||||
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
|
||||
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
|
||||
<literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
|
||||
[, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
|
||||
</entry>
|
||||
<entry><type>text[]</type></entry>
|
||||
<entry>Split <parameter>qualified identifier</parameter> into array
|
||||
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
|
||||
false, extra characters after the identifier are ignored. This is useful
|
||||
for parsing identifiers for objects like functions and arrays that may
|
||||
have trailing characters. By default, extra characters after the last
|
||||
identifier are considered an error, but if the second parameter is false,
|
||||
then the characters after the last identifier are ignored. Note that this
|
||||
function does not truncate quoted identifiers. If you care about that
|
||||
you should cast the result of this function to name[]. Non-printable
|
||||
characters (like 0 to 31) are always displayed as hexadecimal codes,
|
||||
which can be different from PostgreSQL internal SQL identifiers
|
||||
processing, when the original escaped value is displayed.
|
||||
<entry>
|
||||
Split <parameter>qualified_identifier</parameter> into an array of
|
||||
identifiers, removing any quoting of individual identifiers. By
|
||||
default, extra characters after the last identifier are considered an
|
||||
error; but if the second parameter is <literal>false</>, then such
|
||||
extra characters are ignored. (This behavior is useful for parsing
|
||||
names for objects like functions.) Note that this function does not
|
||||
truncate over-length identifiers. If you want truncation you can cast
|
||||
the result to <type>name[]</>.
|
||||
</entry>
|
||||
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
|
||||
<entry><literal>"SomeSchema,sometable"</literal></entry>
|
||||
<entry><literal>{SomeSchema,sometable}</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
|
@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
|
||||
|
||||
|
||||
/*
|
||||
* This simple parser utility are compatible with lexer implementation,
|
||||
* used only in parse_ident function
|
||||
* Is character a valid identifier start?
|
||||
* Must match scan.l's {ident_start} character class.
|
||||
*/
|
||||
static bool
|
||||
is_ident_start(unsigned char c)
|
||||
{
|
||||
/* Underscores and ASCII letters are OK */
|
||||
if (c == '_')
|
||||
return true;
|
||||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
|
||||
return true;
|
||||
|
||||
if (c >= 0200 && c <= 0377)
|
||||
/* Any high-bit-set character is OK (might be part of a multibyte char) */
|
||||
if (IS_HIGHBIT_SET(c))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is character a valid identifier continuation?
|
||||
* Must match scan.l's {ident_cont} character class.
|
||||
*/
|
||||
static bool
|
||||
is_ident_cont(unsigned char c)
|
||||
{
|
||||
if (c >= '0' && c <= '9')
|
||||
/* Can be digit or dollar sign ... */
|
||||
if ((c >= '0' && c <= '9') || c == '$')
|
||||
return true;
|
||||
|
||||
/* ... or an identifier start character */
|
||||
return is_ident_start(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanitize SQL string for using in error message.
|
||||
*/
|
||||
static char *
|
||||
sanitize_text(text *t)
|
||||
{
|
||||
int len = VARSIZE_ANY_EXHDR(t);
|
||||
const char *p = VARDATA_ANY(t);
|
||||
StringInfo dstr;
|
||||
|
||||
dstr = makeStringInfo();
|
||||
|
||||
appendStringInfoChar(dstr, '"');
|
||||
|
||||
while (len--)
|
||||
{
|
||||
switch (*p)
|
||||
{
|
||||
case '\b':
|
||||
appendStringInfoString(dstr, "\\b");
|
||||
break;
|
||||
case '\f':
|
||||
appendStringInfoString(dstr, "\\f");
|
||||
break;
|
||||
case '\n':
|
||||
appendStringInfoString(dstr, "\\n");
|
||||
break;
|
||||
case '\r':
|
||||
appendStringInfoString(dstr, "\\r");
|
||||
break;
|
||||
case '\t':
|
||||
appendStringInfoString(dstr, "\\t");
|
||||
break;
|
||||
case '\'':
|
||||
appendStringInfoString(dstr, "''");
|
||||
break;
|
||||
case '\\':
|
||||
appendStringInfoString(dstr, "\\\\");
|
||||
break;
|
||||
default:
|
||||
if ((unsigned char) *p < ' ')
|
||||
appendStringInfo(dstr, "\\u%04x", (int) *p);
|
||||
else
|
||||
appendStringInfoCharMacro(dstr, *p);
|
||||
break;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
|
||||
appendStringInfoChar(dstr, '"');
|
||||
|
||||
return dstr->data;
|
||||
}
|
||||
|
||||
/*
|
||||
* parse_ident - parse SQL composed identifier to separate identifiers.
|
||||
* parse_ident - parse a SQL qualified identifier into separate identifiers.
|
||||
* When strict mode is active (second parameter), then any chars after
|
||||
* last identifiers are disallowed.
|
||||
* the last identifier are disallowed.
|
||||
*/
|
||||
Datum
|
||||
parse_ident(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *qualname;
|
||||
char *qualname_str;
|
||||
bool strict;
|
||||
text *qualname = PG_GETARG_TEXT_PP(0);
|
||||
bool strict = PG_GETARG_BOOL(1);
|
||||
char *qualname_str = text_to_cstring(qualname);
|
||||
ArrayBuildState *astate = NULL;
|
||||
char *nextp;
|
||||
bool after_dot = false;
|
||||
ArrayBuildState *astate = NULL;
|
||||
|
||||
qualname = PG_GETARG_TEXT_PP(0);
|
||||
qualname_str = text_to_cstring(qualname);
|
||||
strict = PG_GETARG_BOOL(1);
|
||||
|
||||
/*
|
||||
* The code below scribbles on qualname_str in some cases, so we should
|
||||
* reconvert qualname if we need to show the original string in error
|
||||
* messages.
|
||||
*/
|
||||
nextp = qualname_str;
|
||||
|
||||
/* skip leading whitespace */
|
||||
@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
char *curname;
|
||||
char *endp;
|
||||
bool missing_ident;
|
||||
char *curname;
|
||||
bool missing_ident = true;
|
||||
|
||||
missing_ident = true;
|
||||
|
||||
if (*nextp == '\"')
|
||||
if (*nextp == '"')
|
||||
{
|
||||
char *endp;
|
||||
|
||||
curname = nextp + 1;
|
||||
for (;;)
|
||||
{
|
||||
endp = strchr(nextp + 1, '\"');
|
||||
endp = strchr(nextp + 1, '"');
|
||||
if (endp == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unclosed double quotes"),
|
||||
errdetail("string %s is not valid identifier",
|
||||
sanitize_text(qualname))));
|
||||
if (endp[1] != '\"')
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("string is not a valid identifier: \"%s\"",
|
||||
text_to_cstring(qualname)),
|
||||
errdetail("String has unclosed double quotes.")));
|
||||
if (endp[1] != '"')
|
||||
break;
|
||||
memmove(endp, endp + 1, strlen(endp));
|
||||
nextp = endp;
|
||||
@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
|
||||
nextp = endp + 1;
|
||||
*endp = '\0';
|
||||
|
||||
/* Show complete input string in this case. */
|
||||
if (endp - curname == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("identifier should not be empty: %s",
|
||||
sanitize_text(qualname))));
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("string is not a valid identifier: \"%s\"",
|
||||
text_to_cstring(qualname)),
|
||||
errdetail("Quoted identifier must not be empty.")));
|
||||
|
||||
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
|
||||
false, TEXTOID, CurrentMemoryContext);
|
||||
missing_ident = false;
|
||||
}
|
||||
else
|
||||
else if (is_ident_start((unsigned char) *nextp))
|
||||
{
|
||||
if (is_ident_start((unsigned char) *nextp))
|
||||
{
|
||||
char *downname;
|
||||
int len;
|
||||
text *part;
|
||||
char *downname;
|
||||
int len;
|
||||
text *part;
|
||||
|
||||
curname = nextp++;
|
||||
while (is_ident_cont((unsigned char) *nextp))
|
||||
nextp++;
|
||||
curname = nextp++;
|
||||
while (is_ident_cont((unsigned char) *nextp))
|
||||
nextp++;
|
||||
|
||||
len = nextp - curname;
|
||||
len = nextp - curname;
|
||||
|
||||
/*
|
||||
* Unlike name, we don't implicitly truncate identifiers. This
|
||||
* is useful for allowing the user to check for specific parts
|
||||
* of the identifier being too long. It's easy enough for the
|
||||
* user to get the truncated names by casting our output to
|
||||
* name[].
|
||||
*/
|
||||
downname = downcase_identifier(curname, len, false, false);
|
||||
part = cstring_to_text_with_len(downname, len);
|
||||
astate = accumArrayResult(astate, PointerGetDatum(part), false,
|
||||
TEXTOID, CurrentMemoryContext);
|
||||
missing_ident = false;
|
||||
}
|
||||
/*
|
||||
* We don't implicitly truncate identifiers. This is useful for
|
||||
* allowing the user to check for specific parts of the identifier
|
||||
* being too long. It's easy enough for the user to get the
|
||||
* truncated names by casting our output to name[].
|
||||
*/
|
||||
downname = downcase_identifier(curname, len, false, false);
|
||||
part = cstring_to_text_with_len(downname, len);
|
||||
astate = accumArrayResult(astate, PointerGetDatum(part), false,
|
||||
TEXTOID, CurrentMemoryContext);
|
||||
missing_ident = false;
|
||||
}
|
||||
|
||||
if (missing_ident)
|
||||
@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
|
||||
/* Different error messages based on where we failed. */
|
||||
if (*nextp == '.')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("missing valid identifier before \".\" symbol: %s",
|
||||
sanitize_text(qualname))));
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("string is not a valid identifier: \"%s\"",
|
||||
text_to_cstring(qualname)),
|
||||
errdetail("No valid identifier before \".\" symbol.")));
|
||||
else if (after_dot)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("missing valid identifier after \".\" symbol: %s",
|
||||
sanitize_text(qualname))));
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("string is not a valid identifier: \"%s\"",
|
||||
text_to_cstring(qualname)),
|
||||
errdetail("No valid identifier after \".\" symbol.")));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("missing valid identifier: %s",
|
||||
sanitize_text(qualname))));
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("string is not a valid identifier: \"%s\"",
|
||||
text_to_cstring(qualname))));
|
||||
}
|
||||
|
||||
while (isspace((unsigned char) *nextp))
|
||||
@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (strict)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("identifier contains disallowed characters: %s",
|
||||
sanitize_text(qualname))));
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("string is not a valid identifier: \"%s\"",
|
||||
text_to_cstring(qualname))));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
|
||||
(1 row)
|
||||
|
||||
SELECT parse_ident('foo.boo[]'); -- should fail
|
||||
ERROR: identifier contains disallowed characters: "foo.boo[]"
|
||||
ERROR: string is not a valid identifier: "foo.boo[]"
|
||||
SELECT parse_ident('foo.boo[]', strict => false); -- ok
|
||||
parse_ident
|
||||
-------------
|
||||
@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
|
||||
|
||||
-- should fail
|
||||
SELECT parse_ident(' ');
|
||||
ERROR: missing valid identifier: " "
|
||||
ERROR: string is not a valid identifier: " "
|
||||
SELECT parse_ident(' .aaa');
|
||||
ERROR: missing valid identifier before "." symbol: " .aaa"
|
||||
ERROR: string is not a valid identifier: " .aaa"
|
||||
DETAIL: No valid identifier before "." symbol.
|
||||
SELECT parse_ident(' aaa . ');
|
||||
ERROR: missing valid identifier after "." symbol: " aaa . "
|
||||
ERROR: string is not a valid identifier: " aaa . "
|
||||
DETAIL: No valid identifier after "." symbol.
|
||||
SELECT parse_ident('aaa.a%b');
|
||||
ERROR: identifier contains disallowed characters: "aaa.a%b"
|
||||
ERROR: string is not a valid identifier: "aaa.a%b"
|
||||
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
|
||||
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
||||
ERROR: string is not a valid identifier: "X
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
||||
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
|
||||
length | length
|
||||
--------+--------
|
||||
@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66)
|
||||
(1 row)
|
||||
|
||||
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
|
||||
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
|
||||
ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX"
|
||||
SELECT parse_ident('1020');
|
||||
ERROR: missing valid identifier: "1020"
|
||||
ERROR: string is not a valid identifier: "1020"
|
||||
SELECT parse_ident('10.20');
|
||||
ERROR: missing valid identifier: "10.20"
|
||||
ERROR: string is not a valid identifier: "10.20"
|
||||
SELECT parse_ident('.');
|
||||
ERROR: missing valid identifier before "." symbol: "."
|
||||
ERROR: string is not a valid identifier: "."
|
||||
DETAIL: No valid identifier before "." symbol.
|
||||
SELECT parse_ident('.1020');
|
||||
ERROR: missing valid identifier before "." symbol: ".1020"
|
||||
ERROR: string is not a valid identifier: ".1020"
|
||||
DETAIL: No valid identifier before "." symbol.
|
||||
SELECT parse_ident('xxx.1020');
|
||||
ERROR: missing valid identifier after "." symbol: "xxx.1020"
|
||||
ERROR: string is not a valid identifier: "xxx.1020"
|
||||
DETAIL: No valid identifier after "." symbol.
|
||||
|
Loading…
x
Reference in New Issue
Block a user