diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ae93e69f0d..e6c4ee52ee 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1823,25 +1823,22 @@
parse_ident
- parse_ident(str text,
- [ strictmode boolean DEFAULT true ] )
+ parse_ident(qualified_identifier text
+ [, strictmode boolean DEFAULT true ] )
text[]
- Split qualified identifier into array
- parts. When strictmode is
- false, extra characters after the identifier are ignored. This is useful
- for parsing identifiers for objects like functions and arrays that may
- have trailing characters. By default, extra characters after the last
- identifier are considered an error, but if the second parameter is false,
- then the characters after the last identifier are ignored. Note that this
- function does not truncate quoted identifiers. If you care about that
- you should cast the result of this function to name[]. Non-printable
- characters (like 0 to 31) are always displayed as hexadecimal codes,
- which can be different from PostgreSQL internal SQL identifiers
- processing, when the original escaped value is displayed.
+
+ Split qualified_identifier into an array of
+ identifiers, removing any quoting of individual identifiers. By
+ default, extra characters after the last identifier are considered an
+ error; but if the second parameter is false>, then such
+ extra characters are ignored. (This behavior is useful for parsing
+ names for objects like functions.) Note that this function does not
+ truncate over-length identifiers. If you want truncation you can cast
+ the result to name[]>.
parse_ident('"SomeSchema".someTable')
- "SomeSchema,sometable"
+ {SomeSchema,sometable}
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index faa8ef3c91..6f7c407816 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
/*
- * This simple parser utility are compatible with lexer implementation,
- * used only in parse_ident function
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
*/
static bool
is_ident_start(unsigned char c)
{
+ /* Underscores and ASCII letters are OK */
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
-
- if (c >= 0200 && c <= 0377)
+ /* Any high-bit-set character is OK (might be part of a multibyte char) */
+ if (IS_HIGHBIT_SET(c))
return true;
-
return false;
}
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
static bool
is_ident_cont(unsigned char c)
{
- if (c >= '0' && c <= '9')
+ /* Can be digit or dollar sign ... */
+ if ((c >= '0' && c <= '9') || c == '$')
return true;
-
+ /* ... or an identifier start character */
return is_ident_start(c);
}
/*
- * Sanitize SQL string for using in error message.
- */
-static char *
-sanitize_text(text *t)
-{
- int len = VARSIZE_ANY_EXHDR(t);
- const char *p = VARDATA_ANY(t);
- StringInfo dstr;
-
- dstr = makeStringInfo();
-
- appendStringInfoChar(dstr, '"');
-
- while (len--)
- {
- switch (*p)
- {
- case '\b':
- appendStringInfoString(dstr, "\\b");
- break;
- case '\f':
- appendStringInfoString(dstr, "\\f");
- break;
- case '\n':
- appendStringInfoString(dstr, "\\n");
- break;
- case '\r':
- appendStringInfoString(dstr, "\\r");
- break;
- case '\t':
- appendStringInfoString(dstr, "\\t");
- break;
- case '\'':
- appendStringInfoString(dstr, "''");
- break;
- case '\\':
- appendStringInfoString(dstr, "\\\\");
- break;
- default:
- if ((unsigned char) *p < ' ')
- appendStringInfo(dstr, "\\u%04x", (int) *p);
- else
- appendStringInfoCharMacro(dstr, *p);
- break;
- }
- p++;
- }
-
- appendStringInfoChar(dstr, '"');
-
- return dstr->data;
-}
-
-/*
- * parse_ident - parse SQL composed identifier to separate identifiers.
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
* When strict mode is active (second parameter), then any chars after
- * last identifiers are disallowed.
+ * the last identifier are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
- text *qualname;
- char *qualname_str;
- bool strict;
+ text *qualname = PG_GETARG_TEXT_PP(0);
+ bool strict = PG_GETARG_BOOL(1);
+ char *qualname_str = text_to_cstring(qualname);
+ ArrayBuildState *astate = NULL;
char *nextp;
bool after_dot = false;
- ArrayBuildState *astate = NULL;
-
- qualname = PG_GETARG_TEXT_PP(0);
- qualname_str = text_to_cstring(qualname);
- strict = PG_GETARG_BOOL(1);
+ /*
+ * The code below scribbles on qualname_str in some cases, so we should
+ * reconvert qualname if we need to show the original string in error
+ * messages.
+ */
nextp = qualname_str;
/* skip leading whitespace */
@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
for (;;)
{
- char *curname;
- char *endp;
- bool missing_ident;
+ char *curname;
+ bool missing_ident = true;
- missing_ident = true;
-
- if (*nextp == '\"')
+ if (*nextp == '"')
{
+ char *endp;
+
curname = nextp + 1;
for (;;)
{
- endp = strchr(nextp + 1, '\"');
+ endp = strchr(nextp + 1, '"');
if (endp == NULL)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unclosed double quotes"),
- errdetail("string %s is not valid identifier",
- sanitize_text(qualname))));
- if (endp[1] != '\"')
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("String has unclosed double quotes.")));
+ if (endp[1] != '"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = endp + 1;
*endp = '\0';
- /* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier should not be empty: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
- else
+ else if (is_ident_start((unsigned char) *nextp))
{
- if (is_ident_start((unsigned char) *nextp))
- {
- char *downname;
- int len;
- text *part;
+ char *downname;
+ int len;
+ text *part;
- curname = nextp++;
- while (is_ident_cont((unsigned char) *nextp))
- nextp++;
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
- len = nextp - curname;
+ len = nextp - curname;
- /*
- * Unlike name, we don't implicitly truncate identifiers. This
- * is useful for allowing the user to check for specific parts
- * of the identifier being too long. It's easy enough for the
- * user to get the truncated names by casting our output to
- * name[].
- */
- downname = downcase_identifier(curname, len, false, false);
- part = cstring_to_text_with_len(downname, len);
- astate = accumArrayResult(astate, PointerGetDatum(part), false,
- TEXTOID, CurrentMemoryContext);
- missing_ident = false;
- }
+ /*
+ * We don't implicitly truncate identifiers. This is useful for
+ * allowing the user to check for specific parts of the identifier
+ * being too long. It's easy enough for the user to get the
+ * truncated names by casting our output to name[].
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate, PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
}
if (missing_ident)
@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier before \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier before \".\" symbol.")));
else if (after_dot)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier after \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier after \".\" symbol.")));
else
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
}
while (isspace((unsigned char) *nextp))
@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
{
if (strict)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier contains disallowed characters: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
break;
}
}
diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out
index 56139d45ef..acc5ce6193 100644
--- a/src/test/regress/expected/name.out
+++ b/src/test/regress/expected/name.out
@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
(1 row)
SELECT parse_ident('foo.boo[]'); -- should fail
-ERROR: identifier contains disallowed characters: "foo.boo[]"
+ERROR: string is not a valid identifier: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident
-------------
@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
-- should fail
SELECT parse_ident(' ');
-ERROR: missing valid identifier: " "
+ERROR: string is not a valid identifier: " "
SELECT parse_ident(' .aaa');
-ERROR: missing valid identifier before "." symbol: " .aaa"
+ERROR: string is not a valid identifier: " .aaa"
+DETAIL: No valid identifier before "." symbol.
SELECT parse_ident(' aaa . ');
-ERROR: missing valid identifier after "." symbol: " aaa . "
+ERROR: string is not a valid identifier: " aaa . "
+DETAIL: No valid identifier after "." symbol.
SELECT parse_ident('aaa.a%b');
-ERROR: identifier contains disallowed characters: "aaa.a%b"
+ERROR: string is not a valid identifier: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
-ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ERROR: string is not a valid identifier: "X
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length
--------+--------
@@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66)
(1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
-ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
+ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX"
SELECT parse_ident('1020');
-ERROR: missing valid identifier: "1020"
+ERROR: string is not a valid identifier: "1020"
SELECT parse_ident('10.20');
-ERROR: missing valid identifier: "10.20"
+ERROR: string is not a valid identifier: "10.20"
SELECT parse_ident('.');
-ERROR: missing valid identifier before "." symbol: "."
+ERROR: string is not a valid identifier: "."
+DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('.1020');
-ERROR: missing valid identifier before "." symbol: ".1020"
+ERROR: string is not a valid identifier: ".1020"
+DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('xxx.1020');
-ERROR: missing valid identifier after "." symbol: "xxx.1020"
+ERROR: string is not a valid identifier: "xxx.1020"
+DETAIL: No valid identifier after "." symbol.