Add current substring regular expression syntax
SQL:1999 had syntax SUBSTRING(text FROM pattern FOR escapechar) but this was replaced in SQL:2003 by the more clear SUBSTRING(text SIMILAR pattern ESCAPE escapechar) but this was never implemented in PostgreSQL. This patch adds that new syntax as an alternative in the parser, and updates documentation and tests to indicate that this is the preferred alternative now. Reviewed-by: Pavel Stehule <pavel.stehule@gmail.com> Reviewed-by: Vik Fearing <vik@postgresfriends.org> Reviewed-by: Fabien COELHO <coelho@cri.ensmp.fr> Discussion: https://www.postgresql.org/message-id/flat/a15db31c-d0f8-8ce0-9039-578a31758adb%402ndquadrant.com
This commit is contained in:
parent
aafefb4dcb
commit
78c887679d
@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
|
||||
SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
|
||||
t
|
||||
---
|
||||
t
|
||||
|
@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
|
||||
SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
|
||||
t
|
||||
---
|
||||
t
|
||||
|
@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
|
||||
SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
|
||||
SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
|
||||
SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
|
||||
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
|
||||
SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
|
||||
|
||||
SELECT trim(' trim '::citext) = 'trim' AS t;
|
||||
SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;
|
||||
|
@ -2669,15 +2669,21 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
|
||||
|
||||
<row>
|
||||
<entry role="func_table_entry"><para role="func_signature">
|
||||
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> )
|
||||
<returnvalue>text</returnvalue>
|
||||
</para>
|
||||
<para role="func_signature">
|
||||
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> )
|
||||
<returnvalue>text</returnvalue>
|
||||
</para>
|
||||
<para>
|
||||
Extracts substring matching <acronym>SQL</acronym> regular expression;
|
||||
see <xref linkend="functions-similarto-regexp"/>.
|
||||
see <xref linkend="functions-similarto-regexp"/>. The first form has
|
||||
been specified since SQL:2003; the second form was only in SQL:1999
|
||||
and should be considered obsolete.
|
||||
</para>
|
||||
<para>
|
||||
<literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal>
|
||||
<literal>substring('Thomas' similar '%#"o_a#"_' escape '#')</literal>
|
||||
<returnvalue>oma</returnvalue>
|
||||
</para></entry>
|
||||
</row>
|
||||
@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
|
||||
The <function>substring</function> function with three parameters
|
||||
provides extraction of a substring that matches an SQL
|
||||
regular expression pattern. The function can be written according
|
||||
to SQL99 syntax:
|
||||
to standard SQL syntax:
|
||||
<synopsis>
|
||||
substring(<replaceable>string</replaceable> similar <replaceable>pattern</replaceable> escape <replaceable>escape-character</replaceable>)
|
||||
</synopsis>
|
||||
or using the now obsolete SQL:1999 syntax:
|
||||
<synopsis>
|
||||
substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable> for <replaceable>escape-character</replaceable>)
|
||||
</synopsis>
|
||||
@ -5201,8 +5211,8 @@ substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>,
|
||||
<para>
|
||||
Some examples, with <literal>#"</literal> delimiting the return string:
|
||||
<programlisting>
|
||||
substring('foobar' from '%#"o_b#"%' for '#') <lineannotation>oob</lineannotation>
|
||||
substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotation>
|
||||
substring('foobar' similar '%#"o_b#"%' escape '#') <lineannotation>oob</lineannotation>
|
||||
substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</lineannotation>
|
||||
</programlisting>
|
||||
</para>
|
||||
</sect2>
|
||||
|
@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text
|
||||
AS
|
||||
$$SELECT
|
||||
CASE WHEN $1 IN (1186) /* interval */
|
||||
THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#'))
|
||||
THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#'))
|
||||
ELSE null
|
||||
END$$;
|
||||
|
||||
|
@ -14451,7 +14451,27 @@ position_list:
|
||||
| /*EMPTY*/ { $$ = NIL; }
|
||||
;
|
||||
|
||||
/* SUBSTRING() arguments */
|
||||
/*
|
||||
* SUBSTRING() arguments
|
||||
*
|
||||
* Note that SQL:1999 has both
|
||||
*
|
||||
* text FROM int FOR int
|
||||
*
|
||||
* and
|
||||
*
|
||||
* text FROM pattern FOR escape
|
||||
*
|
||||
* In the parser we map them both to a call to the substring() function and
|
||||
* rely on type resolution to pick the right one.
|
||||
*
|
||||
* In SQL:2003, the second variant was changed to
|
||||
*
|
||||
* text SIMILAR pattern ESCAPE escape
|
||||
*
|
||||
* We could in theory map that to a different function internally, but
|
||||
* since we still support the SQL:1999 version, we don't.
|
||||
*/
|
||||
substr_list:
|
||||
a_expr FROM a_expr FOR a_expr
|
||||
{
|
||||
@ -14483,6 +14503,10 @@ substr_list:
|
||||
makeTypeCast($3,
|
||||
SystemTypeName("int4"), -1));
|
||||
}
|
||||
| a_expr SIMILAR a_expr ESCAPE a_expr
|
||||
{
|
||||
$$ = list_make3($1, $3, $5);
|
||||
}
|
||||
/*
|
||||
* We also want to support generic substring functions that
|
||||
* accept the usual generic list of arguments.
|
||||
|
@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
|
||||
(1 row)
|
||||
|
||||
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
|
||||
bcd
|
||||
-----
|
||||
bcd
|
||||
(1 row)
|
||||
|
||||
-- obsolete SQL99 syntax
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
|
||||
bcd
|
||||
-----
|
||||
@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
|
||||
(1 row)
|
||||
|
||||
-- No match should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
|
||||
True
|
||||
------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Null inputs should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
|
||||
True
|
||||
------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
|
||||
True
|
||||
------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
|
||||
True
|
||||
------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- The first and last parts should act non-greedy
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
|
||||
bcdef
|
||||
-------
|
||||
bcdef
|
||||
(1 row)
|
||||
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
|
||||
abcdefg
|
||||
---------
|
||||
abcdefg
|
||||
(1 row)
|
||||
|
||||
-- Vertical bar in any part affects only that part
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
|
||||
bcdef
|
||||
-------
|
||||
bcdef
|
||||
(1 row)
|
||||
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
|
||||
bcdef
|
||||
-------
|
||||
bcdef
|
||||
(1 row)
|
||||
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
|
||||
bcdef
|
||||
-------
|
||||
bcdef
|
||||
(1 row)
|
||||
|
||||
-- Can't have more than two part separators
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
|
||||
ERROR: SQL regular expression may not contain more than two escape-double-quote separators
|
||||
CONTEXT: SQL function "substring" statement 1
|
||||
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
|
||||
bcdefg
|
||||
--------
|
||||
bcdefg
|
||||
(1 row)
|
||||
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
|
||||
abcdefg
|
||||
---------
|
||||
abcdefg
|
||||
|
@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
|
||||
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
|
||||
|
||||
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
|
||||
-- obsolete SQL99 syntax
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
|
||||
|
||||
-- No match should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
|
||||
|
||||
-- Null inputs should return NULL
|
||||
SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
|
||||
SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
|
||||
SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
|
||||
|
||||
-- The first and last parts should act non-greedy
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
|
||||
|
||||
-- Vertical bar in any part affects only that part
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
|
||||
|
||||
-- Can't have more than two part separators
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
|
||||
|
||||
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
|
||||
SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
|
||||
SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
|
||||
|
||||
-- substring() with just two arguments is not allowed by SQL spec;
|
||||
-- we accept it, but we interpret the pattern as a POSIX regexp not SQL
|
||||
|
Loading…
x
Reference in New Issue
Block a user