diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 860ae11826..c5048a1998 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -6323,32 +6323,38 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
\d
- [[:digit:]]
+ matches any digit, like
+ [[:digit:]]
\s
- [[:space:]]
+ matches any whitespace character, like
+ [[:space:]]
\w
- [[:word:]]
+ matches any word character, like
+ [[:word:]]
\D
- [^[:digit:]]
+ matches any non-digit, like
+ [^[:digit:]]
\S
- [^[:space:]]
+ matches any non-whitespace character, like
+ [^[:space:]]
\W
- [^[:word:]]
+ matches any non-word character, like
+ [^[:word:]]
@@ -6813,14 +6819,20 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
If newline-sensitive matching is specified, .
and bracket expressions using ^
will never match the newline character
- (so that matches will never cross newlines unless the RE
- explicitly arranges it)
+ (so that matches will not cross lines unless the RE
+ explicitly includes a newline)
and ^ and $
will match the empty string after and before a newline
respectively, in addition to matching at beginning and end of string
respectively.
But the ARE escapes \A and \Z
continue to match beginning or end of string only.
+ Also, the character class shorthands \D
+ and \W will match a newline regardless of this mode.
+ (Before PostgreSQL 14, they did not match
+ newlines when in newline-sensitive mode.
+ Write [^[:digit:]]
+ or [^[:word:]] to get the old behavior.)
diff --git a/src/backend/regex/re_syntax.n b/src/backend/regex/re_syntax.n
index 1afaa7cce7..93830fd100 100644
--- a/src/backend/regex/re_syntax.n
+++ b/src/backend/regex/re_syntax.n
@@ -804,7 +804,7 @@ and bracket expressions using
\fB^\fR
will never match the newline character
(so that matches will never cross newlines unless the RE
-explicitly arranges it)
+explicitly includes a newline)
and
\fB^\fR
and
@@ -817,6 +817,11 @@ ARE
and
\fB\eZ\fR
continue to match beginning or end of string \fIonly\fR.
+Also, the character class shorthands
+\fB\eD\fR
+and
+\fB\eW\fR
+will match a newline regardless of this mode.
.PP
If partial newline-sensitive matching is specified,
this affects \fB.\fR
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 7b77a29136..d3540fdd0f 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1407,10 +1407,6 @@ charclasscomplement(struct vars *v,
/* build arcs for char class; this may cause color splitting */
subcolorcvec(v, cv, cstate, cstate);
-
- /* in NLSTOP mode, ensure newline is not part of the result set */
- if (v->cflags & REG_NLSTOP)
- newarc(v->nfa, PLAIN, v->nlcolor, cstate, cstate);
NOERR();
/* clean up any subcolors in the arc set */
@@ -1612,6 +1608,8 @@ cbracket(struct vars *v,
NOERR();
bracket(v, left, right);
+
+ /* in NLSTOP mode, ensure newline is not part of the result set */
if (v->cflags & REG_NLSTOP)
newarc(v->nfa, PLAIN, v->nlcolor, left, right);
NOERR();
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 92154b6d28..5d993f40c2 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -2144,7 +2144,8 @@ select * from test_regex('\D+', E'abc\ndef345', 'nLP');
test_regex
-------------------------------
{0,REG_UNONPOSIX,REG_ULOCALE}
- {abc}
+ {"abc +
+ def"}
(2 rows)
select * from test_regex('[\D]+', E'abc\ndef345', 'LPE');
@@ -2159,7 +2160,8 @@ select * from test_regex('[\D]+', E'abc\ndef345', 'nLPE');
test_regex
----------------------------------------
{0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE}
- {abc}
+ {"abc +
+ def"}
(2 rows)
select * from test_regex('\w+', E'abc_012\ndef', 'LP');
@@ -2202,7 +2204,8 @@ select * from test_regex('\W+', E'***\n@@@___', 'nLP');
test_regex
-------------------------------
{0,REG_UNONPOSIX,REG_ULOCALE}
- {***}
+ {"*** +
+ @@@"}
(2 rows)
select * from test_regex('[\W]+', E'***\n@@@___', 'LPE');
@@ -2217,7 +2220,8 @@ select * from test_regex('[\W]+', E'***\n@@@___', 'nLPE');
test_regex
----------------------------------------
{0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE}
- {***}
+ {"*** +
+ @@@"}
(2 rows)
-- doing 13 "escapes"