diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out index 96800be9c0..ec99aaed5d 100644 --- a/contrib/citext/expected/citext.out +++ b/contrib/citext/expected/citext.out @@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; t (1 row) -SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; +SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t; t --- t diff --git a/contrib/citext/expected/citext_1.out b/contrib/citext/expected/citext_1.out index 33e3676d3c..75fd08b7cc 100644 --- a/contrib/citext/expected/citext_1.out +++ b/contrib/citext/expected/citext_1.out @@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; t (1 row) -SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; +SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t; t --- t diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql index 261b73cfa6..10232f5a9f 100644 --- a/contrib/citext/sql/citext.sql +++ b/contrib/citext/sql/citext.sql @@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t; SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t; SELECT substring('Thomas'::citext from 2) = 'homas' AS t; SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; -SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; +SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t; SELECT trim(' trim '::citext) = 'trim' AS t; SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t; diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7119f0b2ca..f065856535 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -2669,15 +2669,21 @@ repeat('Pg', 4) PgPgPgPg + substring ( string text SIMILAR pattern text ESCAPE escape text ) + text + + substring ( string text FROM pattern text FOR escape text ) text Extracts substring matching SQL regular expression; - see . + see . The first form has + been specified since SQL:2003; the second form was only in SQL:1999 + and should be considered obsolete. - substring('Thomas' from '%#"o_a#"_' for '#') + substring('Thomas' similar '%#"o_a#"_' escape '#') oma @@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) 111111010100 The substring function with three parameters provides extraction of a substring that matches an SQL regular expression pattern. The function can be written according - to SQL99 syntax: + to standard SQL syntax: + +substring(string similar pattern escape escape-character) + + or using the now obsolete SQL:1999 syntax: substring(string from pattern for escape-character) @@ -5201,8 +5211,8 @@ substring(string, pattern, Some examples, with #" delimiting the return string: -substring('foobar' from '%#"o_b#"%' for '#') oob -substring('foobar' from '#"o_b#"%' for '#') NULL +substring('foobar' similar '%#"o_b#"%' escape '#') oob +substring('foobar' similar '#"o_b#"%' escape '#') NULL diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 3e07fb107e..5ab47e7743 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text AS $$SELECT CASE WHEN $1 IN (1186) /* interval */ - THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#')) + THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#')) ELSE null END$$; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 1a843049f0..4ff35095b8 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -14451,7 +14451,27 @@ position_list: | /*EMPTY*/ { $$ = NIL; } ; -/* SUBSTRING() arguments */ +/* + * SUBSTRING() arguments + * + * Note that SQL:1999 has both + * + * text FROM int FOR int + * + * and + * + * text FROM pattern FOR escape + * + * In the parser we map them both to a call to the substring() function and + * rely on type resolution to pick the right one. + * + * In SQL:2003, the second variant was changed to + * + * text SIMILAR pattern ESCAPE escape + * + * We could in theory map that to a different function internally, but + * since we still support the SQL:1999 version, we don't. + */ substr_list: a_expr FROM a_expr FOR a_expr { @@ -14483,6 +14503,10 @@ substr_list: makeTypeCast($3, SystemTypeName("int4"), -1)); } + | a_expr SIMILAR a_expr ESCAPE a_expr + { + $$ = list_make3($1, $3, $5); + } /* * We also want to support generic substring functions that * accept the usual generic list of arguments. diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 6e98d183f6..8c034c9599 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; (1 row) -- T581 regular expression substring (with SQL's bizarre regexp syntax) +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd"; + bcd +----- + bcd +(1 row) + +-- obsolete SQL99 syntax SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; bcd ----- @@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; (1 row) -- No match should return NULL -SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True"; True ------ t (1 row) -- Null inputs should return NULL -SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True"; True ------ t (1 row) -SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True"; True ------ t (1 row) -SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True"; True ------ t (1 row) -- The first and last parts should act non-greedy -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg"; abcdefg --------- abcdefg (1 row) -- Vertical bar in any part affects only that part -SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef"; bcdef ------- bcdef (1 row) -- Can't have more than two part separators -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error"; ERROR: SQL regular expression may not contain more than two escape-double-quote separators CONTEXT: SQL function "substring" statement 1 -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty -SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg"; bcdefg -------- bcdefg (1 row) -SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg"; abcdefg --------- abcdefg diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 3e89159a4f..14901a2692 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; -- T581 regular expression substring (with SQL's bizarre regexp syntax) +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd"; +-- obsolete SQL99 syntax SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; -- No match should return NULL -SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True"; -- Null inputs should return NULL -SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True"; -SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True"; -SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True"; +SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True"; +SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True"; -- The first and last parts should act non-greedy -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef"; -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg"; -- Vertical bar in any part affects only that part -SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef"; -SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef"; -SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef"; -- Can't have more than two part separators -SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error"; -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty -SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg"; -SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg"; +SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg"; -- substring() with just two arguments is not allowed by SQL spec; -- we accept it, but we interpret the pattern as a POSIX regexp not SQL