diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index ae93e69f0d..e6c4ee52ee 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1823,25 +1823,22 @@ parse_ident - parse_ident(str text, - [ strictmode boolean DEFAULT true ] ) + parse_ident(qualified_identifier text + [, strictmode boolean DEFAULT true ] ) text[] - Split qualified identifier into array - parts. When strictmode is - false, extra characters after the identifier are ignored. This is useful - for parsing identifiers for objects like functions and arrays that may - have trailing characters. By default, extra characters after the last - identifier are considered an error, but if the second parameter is false, - then the characters after the last identifier are ignored. Note that this - function does not truncate quoted identifiers. If you care about that - you should cast the result of this function to name[]. Non-printable - characters (like 0 to 31) are always displayed as hexadecimal codes, - which can be different from PostgreSQL internal SQL identifiers - processing, when the original escaped value is displayed. + + Split qualified_identifier into an array of + identifiers, removing any quoting of individual identifiers. By + default, extra characters after the last identifier are considered an + error; but if the second parameter is false, then such + extra characters are ignored. (This behavior is useful for parsing + names for objects like functions.) Note that this function does not + truncate over-length identifiers. If you want truncation you can cast + the result to name[]. parse_ident('"SomeSchema".someTable') - "SomeSchema,sometable" + {SomeSchema,sometable} diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index faa8ef3c91..6f7c407816 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) /* - * This simple parser utility are compatible with lexer implementation, - * used only in parse_ident function + * Is character a valid identifier start? + * Must match scan.l's {ident_start} character class. */ static bool is_ident_start(unsigned char c) { + /* Underscores and ASCII letters are OK */ if (c == '_') return true; if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) return true; - - if (c >= 0200 && c <= 0377) + /* Any high-bit-set character is OK (might be part of a multibyte char) */ + if (IS_HIGHBIT_SET(c)) return true; - return false; } +/* + * Is character a valid identifier continuation? + * Must match scan.l's {ident_cont} character class. + */ static bool is_ident_cont(unsigned char c) { - if (c >= '0' && c <= '9') + /* Can be digit or dollar sign ... */ + if ((c >= '0' && c <= '9') || c == '$') return true; - + /* ... or an identifier start character */ return is_ident_start(c); } /* - * Sanitize SQL string for using in error message. - */ -static char * -sanitize_text(text *t) -{ - int len = VARSIZE_ANY_EXHDR(t); - const char *p = VARDATA_ANY(t); - StringInfo dstr; - - dstr = makeStringInfo(); - - appendStringInfoChar(dstr, '"'); - - while (len--) - { - switch (*p) - { - case '\b': - appendStringInfoString(dstr, "\\b"); - break; - case '\f': - appendStringInfoString(dstr, "\\f"); - break; - case '\n': - appendStringInfoString(dstr, "\\n"); - break; - case '\r': - appendStringInfoString(dstr, "\\r"); - break; - case '\t': - appendStringInfoString(dstr, "\\t"); - break; - case '\'': - appendStringInfoString(dstr, "''"); - break; - case '\\': - appendStringInfoString(dstr, "\\\\"); - break; - default: - if ((unsigned char) *p < ' ') - appendStringInfo(dstr, "\\u%04x", (int) *p); - else - appendStringInfoCharMacro(dstr, *p); - break; - } - p++; - } - - appendStringInfoChar(dstr, '"'); - - return dstr->data; -} - -/* - * parse_ident - parse SQL composed identifier to separate identifiers. + * parse_ident - parse a SQL qualified identifier into separate identifiers. * When strict mode is active (second parameter), then any chars after - * last identifiers are disallowed. + * the last identifier are disallowed. */ Datum parse_ident(PG_FUNCTION_ARGS) { - text *qualname; - char *qualname_str; - bool strict; + text *qualname = PG_GETARG_TEXT_PP(0); + bool strict = PG_GETARG_BOOL(1); + char *qualname_str = text_to_cstring(qualname); + ArrayBuildState *astate = NULL; char *nextp; bool after_dot = false; - ArrayBuildState *astate = NULL; - - qualname = PG_GETARG_TEXT_PP(0); - qualname_str = text_to_cstring(qualname); - strict = PG_GETARG_BOOL(1); + /* + * The code below scribbles on qualname_str in some cases, so we should + * reconvert qualname if we need to show the original string in error + * messages. + */ nextp = qualname_str; /* skip leading whitespace */ @@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS) for (;;) { - char *curname; - char *endp; - bool missing_ident; + char *curname; + bool missing_ident = true; - missing_ident = true; - - if (*nextp == '\"') + if (*nextp == '"') { + char *endp; + curname = nextp + 1; for (;;) { - endp = strchr(nextp + 1, '\"'); + endp = strchr(nextp + 1, '"'); if (endp == NULL) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unclosed double quotes"), - errdetail("string %s is not valid identifier", - sanitize_text(qualname)))); - if (endp[1] != '\"') + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("String has unclosed double quotes."))); + if (endp[1] != '"') break; memmove(endp, endp + 1, strlen(endp)); nextp = endp; @@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS) nextp = endp + 1; *endp = '\0'; - /* Show complete input string in this case. */ if (endp - curname == 0) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("identifier should not be empty: %s", - sanitize_text(qualname)))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("Quoted identifier must not be empty."))); astate = accumArrayResult(astate, CStringGetTextDatum(curname), false, TEXTOID, CurrentMemoryContext); missing_ident = false; } - else + else if (is_ident_start((unsigned char) *nextp)) { - if (is_ident_start((unsigned char) *nextp)) - { - char *downname; - int len; - text *part; + char *downname; + int len; + text *part; - curname = nextp++; - while (is_ident_cont((unsigned char) *nextp)) - nextp++; + curname = nextp++; + while (is_ident_cont((unsigned char) *nextp)) + nextp++; - len = nextp - curname; + len = nextp - curname; - /* - * Unlike name, we don't implicitly truncate identifiers. This - * is useful for allowing the user to check for specific parts - * of the identifier being too long. It's easy enough for the - * user to get the truncated names by casting our output to - * name[]. - */ - downname = downcase_identifier(curname, len, false, false); - part = cstring_to_text_with_len(downname, len); - astate = accumArrayResult(astate, PointerGetDatum(part), false, - TEXTOID, CurrentMemoryContext); - missing_ident = false; - } + /* + * We don't implicitly truncate identifiers. This is useful for + * allowing the user to check for specific parts of the identifier + * being too long. It's easy enough for the user to get the + * truncated names by casting our output to name[]. + */ + downname = downcase_identifier(curname, len, false, false); + part = cstring_to_text_with_len(downname, len); + astate = accumArrayResult(astate, PointerGetDatum(part), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; } if (missing_ident) @@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS) /* Different error messages based on where we failed. */ if (*nextp == '.') ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing valid identifier before \".\" symbol: %s", - sanitize_text(qualname)))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("No valid identifier before \".\" symbol."))); else if (after_dot) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing valid identifier after \".\" symbol: %s", - sanitize_text(qualname)))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)), + errdetail("No valid identifier after \".\" symbol."))); else ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("missing valid identifier: %s", - sanitize_text(qualname)))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)))); } while (isspace((unsigned char) *nextp)) @@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS) { if (strict) ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("identifier contains disallowed characters: %s", - sanitize_text(qualname)))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("string is not a valid identifier: \"%s\"", + text_to_cstring(qualname)))); break; } } diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out index 56139d45ef..acc5ce6193 100644 --- a/src/test/regress/expected/name.out +++ b/src/test/regress/expected/name.out @@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo'); (1 row) SELECT parse_ident('foo.boo[]'); -- should fail -ERROR: identifier contains disallowed characters: "foo.boo[]" +ERROR: string is not a valid identifier: "foo.boo[]" SELECT parse_ident('foo.boo[]', strict => false); -- ok parse_ident ------------- @@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok -- should fail SELECT parse_ident(' '); -ERROR: missing valid identifier: " " +ERROR: string is not a valid identifier: " " SELECT parse_ident(' .aaa'); -ERROR: missing valid identifier before "." symbol: " .aaa" +ERROR: string is not a valid identifier: " .aaa" +DETAIL: No valid identifier before "." symbol. SELECT parse_ident(' aaa . '); -ERROR: missing valid identifier after "." symbol: " aaa . " +ERROR: string is not a valid identifier: " aaa . " +DETAIL: No valid identifier after "." symbol. SELECT parse_ident('aaa.a%b'); -ERROR: identifier contains disallowed characters: "aaa.a%b" +ERROR: string is not a valid identifier: "aaa.a%b" SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); -ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" +ERROR: string is not a valid identifier: "X XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ; length | length --------+-------- @@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) (1 row) SELECT parse_ident(E'"c".X XXXX\002XXXXXX'); -ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX" +ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX" SELECT parse_ident('1020'); -ERROR: missing valid identifier: "1020" +ERROR: string is not a valid identifier: "1020" SELECT parse_ident('10.20'); -ERROR: missing valid identifier: "10.20" +ERROR: string is not a valid identifier: "10.20" SELECT parse_ident('.'); -ERROR: missing valid identifier before "." symbol: "." +ERROR: string is not a valid identifier: "." +DETAIL: No valid identifier before "." symbol. SELECT parse_ident('.1020'); -ERROR: missing valid identifier before "." symbol: ".1020" +ERROR: string is not a valid identifier: ".1020" +DETAIL: No valid identifier before "." symbol. SELECT parse_ident('xxx.1020'); -ERROR: missing valid identifier after "." symbol: "xxx.1020" +ERROR: string is not a valid identifier: "xxx.1020" +DETAIL: No valid identifier after "." symbol.