diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 860ae11826..c5048a1998 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6323,32 +6323,38 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; \d - [[:digit:]] + matches any digit, like + [[:digit:]] \s - [[:space:]] + matches any whitespace character, like + [[:space:]] \w - [[:word:]] + matches any word character, like + [[:word:]] \D - [^[:digit:]] + matches any non-digit, like + [^[:digit:]] \S - [^[:space:]] + matches any non-whitespace character, like + [^[:space:]] \W - [^[:word:]] + matches any non-word character, like + [^[:word:]] @@ -6813,14 +6819,20 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); If newline-sensitive matching is specified, . and bracket expressions using ^ will never match the newline character - (so that matches will never cross newlines unless the RE - explicitly arranges it) + (so that matches will not cross lines unless the RE + explicitly includes a newline) and ^ and $ will match the empty string after and before a newline respectively, in addition to matching at beginning and end of string respectively. But the ARE escapes \A and \Z continue to match beginning or end of string only. + Also, the character class shorthands \D + and \W will match a newline regardless of this mode. + (Before PostgreSQL 14, they did not match + newlines when in newline-sensitive mode. + Write [^[:digit:]] + or [^[:word:]] to get the old behavior.) diff --git a/src/backend/regex/re_syntax.n b/src/backend/regex/re_syntax.n index 1afaa7cce7..93830fd100 100644 --- a/src/backend/regex/re_syntax.n +++ b/src/backend/regex/re_syntax.n @@ -804,7 +804,7 @@ and bracket expressions using \fB^\fR will never match the newline character (so that matches will never cross newlines unless the RE -explicitly arranges it) +explicitly includes a newline) and \fB^\fR and @@ -817,6 +817,11 @@ ARE and \fB\eZ\fR continue to match beginning or end of string \fIonly\fR. +Also, the character class shorthands +\fB\eD\fR +and +\fB\eW\fR +will match a newline regardless of this mode. .PP If partial newline-sensitive matching is specified, this affects \fB.\fR diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 7b77a29136..d3540fdd0f 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1407,10 +1407,6 @@ charclasscomplement(struct vars *v, /* build arcs for char class; this may cause color splitting */ subcolorcvec(v, cv, cstate, cstate); - - /* in NLSTOP mode, ensure newline is not part of the result set */ - if (v->cflags & REG_NLSTOP) - newarc(v->nfa, PLAIN, v->nlcolor, cstate, cstate); NOERR(); /* clean up any subcolors in the arc set */ @@ -1612,6 +1608,8 @@ cbracket(struct vars *v, NOERR(); bracket(v, left, right); + + /* in NLSTOP mode, ensure newline is not part of the result set */ if (v->cflags & REG_NLSTOP) newarc(v->nfa, PLAIN, v->nlcolor, left, right); NOERR(); diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 92154b6d28..5d993f40c2 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -2144,7 +2144,8 @@ select * from test_regex('\D+', E'abc\ndef345', 'nLP'); test_regex ------------------------------- {0,REG_UNONPOSIX,REG_ULOCALE} - {abc} + {"abc + + def"} (2 rows) select * from test_regex('[\D]+', E'abc\ndef345', 'LPE'); @@ -2159,7 +2160,8 @@ select * from test_regex('[\D]+', E'abc\ndef345', 'nLPE'); test_regex ---------------------------------------- {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} - {abc} + {"abc + + def"} (2 rows) select * from test_regex('\w+', E'abc_012\ndef', 'LP'); @@ -2202,7 +2204,8 @@ select * from test_regex('\W+', E'***\n@@@___', 'nLP'); test_regex ------------------------------- {0,REG_UNONPOSIX,REG_ULOCALE} - {***} + {"*** + + @@@"} (2 rows) select * from test_regex('[\W]+', E'***\n@@@___', 'LPE'); @@ -2217,7 +2220,8 @@ select * from test_regex('[\W]+', E'***\n@@@___', 'nLPE'); test_regex ---------------------------------------- {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} - {***} + {"*** + + @@@"} (2 rows) -- doing 13 "escapes"