diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out
index 96800be9c0..ec99aaed5d 100644
--- a/contrib/citext/expected/citext.out
+++ b/contrib/citext/expected/citext.out
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t
(1 row)
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t
---
t
diff --git a/contrib/citext/expected/citext_1.out b/contrib/citext/expected/citext_1.out
index 33e3676d3c..75fd08b7cc 100644
--- a/contrib/citext/expected/citext_1.out
+++ b/contrib/citext/expected/citext_1.out
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t
(1 row)
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t
---
t
diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql
index 261b73cfa6..10232f5a9f 100644
--- a/contrib/citext/sql/citext.sql
+++ b/contrib/citext/sql/citext.sql
@@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
SELECT trim(' trim '::citext) = 'trim' AS t;
SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7119f0b2ca..f065856535 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2669,15 +2669,21 @@ repeat('Pg', 4) PgPgPgPg
+ substring ( string text SIMILAR pattern text ESCAPE escape text )
+ text
+
+
substring ( string text FROM pattern text FOR escape text )
text
Extracts substring matching SQL regular expression;
- see .
+ see . The first form has
+ been specified since SQL:2003; the second form was only in SQL:1999
+ and should be considered obsolete.
- substring('Thomas' from '%#"o_a#"_' for '#')
+ substring('Thomas' similar '%#"o_a#"_' escape '#')
oma
@@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) 111111010100
The substring function with three parameters
provides extraction of a substring that matches an SQL
regular expression pattern. The function can be written according
- to SQL99 syntax:
+ to standard SQL syntax:
+
+substring(string similar pattern escape escape-character)
+
+ or using the now obsolete SQL:1999 syntax:
substring(string from pattern for escape-character)
@@ -5201,8 +5211,8 @@ substring(string, pattern,
Some examples, with #" delimiting the return string:
-substring('foobar' from '%#"o_b#"%' for '#') oob
-substring('foobar' from '#"o_b#"%' for '#') NULL
+substring('foobar' similar '%#"o_b#"%' escape '#') oob
+substring('foobar' similar '#"o_b#"%' escape '#') NULL
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 3e07fb107e..5ab47e7743 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text
AS
$$SELECT
CASE WHEN $1 IN (1186) /* interval */
- THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#'))
+ THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#'))
ELSE null
END$$;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 1a843049f0..4ff35095b8 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -14451,7 +14451,27 @@ position_list:
| /*EMPTY*/ { $$ = NIL; }
;
-/* SUBSTRING() arguments */
+/*
+ * SUBSTRING() arguments
+ *
+ * Note that SQL:1999 has both
+ *
+ * text FROM int FOR int
+ *
+ * and
+ *
+ * text FROM pattern FOR escape
+ *
+ * In the parser we map them both to a call to the substring() function and
+ * rely on type resolution to pick the right one.
+ *
+ * In SQL:2003, the second variant was changed to
+ *
+ * text SIMILAR pattern ESCAPE escape
+ *
+ * We could in theory map that to a different function internally, but
+ * since we still support the SQL:1999 version, we don't.
+ */
substr_list:
a_expr FROM a_expr FOR a_expr
{
@@ -14483,6 +14503,10 @@ substr_list:
makeTypeCast($3,
SystemTypeName("int4"), -1));
}
+ | a_expr SIMILAR a_expr ESCAPE a_expr
+ {
+ $$ = list_make3($1, $3, $5);
+ }
/*
* We also want to support generic substring functions that
* accept the usual generic list of arguments.
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 6e98d183f6..8c034c9599 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
(1 row)
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
+ bcd
+-----
+ bcd
+(1 row)
+
+-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
bcd
-----
@@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
(1 row)
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-- Null inputs should return NULL
-SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
True
------
t
(1 row)
-SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-- The first and last parts should act non-greedy
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
abcdefg
---------
abcdefg
(1 row)
-- Vertical bar in any part affects only that part
-SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-- Can't have more than two part separators
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
ERROR: SQL regular expression may not contain more than two escape-double-quote separators
CONTEXT: SQL function "substring" statement 1
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
-SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
bcdefg
--------
bcdefg
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
abcdefg
---------
abcdefg
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 3e89159a4f..14901a2692 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
+-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
-- Null inputs should return NULL
-SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
-SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
-SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
+SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
-- The first and last parts should act non-greedy
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
-- Vertical bar in any part affects only that part
-SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
-- Can't have more than two part separators
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
-SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
-SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
-- substring() with just two arguments is not allowed by SQL spec;
-- we accept it, but we interpret the pattern as a POSIX regexp not SQL