Fix regexp substring matching (substring(string from pattern)) for the corner
case where there is a match to the pattern overall but the user has specified a parenthesized subexpression and that subexpression hasn't got a match. An example is substring('foo' from 'foo(bar)?'). This should return NULL, since (bar) isn't matched, but it was mistakenly returning the whole-pattern match instead (ie, 'foo'). Per bug #4044 from Rui Martins. This has been broken since the beginning; patch in all supported versions. The old behavior was sufficiently inconsistent that it's impossible to believe anyone is depending on it.
This commit is contained in:
parent
8436f9a036
commit
965a2a191a
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.78 2008/01/01 19:45:52 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.79 2008/03/19 02:40:37 tgl Exp $
|
||||||
*
|
*
|
||||||
* Alistair Crooks added the code for the regex caching
|
* Alistair Crooks added the code for the regex caching
|
||||||
* agc - cached the regular expressions used - there's a good chance
|
* agc - cached the regular expressions used - there's a good chance
|
||||||
@ -576,8 +576,13 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
text *s = PG_GETARG_TEXT_PP(0);
|
text *s = PG_GETARG_TEXT_PP(0);
|
||||||
text *p = PG_GETARG_TEXT_PP(1);
|
text *p = PG_GETARG_TEXT_PP(1);
|
||||||
bool match;
|
regex_t *re;
|
||||||
regmatch_t pmatch[2];
|
regmatch_t pmatch[2];
|
||||||
|
int so,
|
||||||
|
eo;
|
||||||
|
|
||||||
|
/* Compile RE */
|
||||||
|
re = RE_compile_and_cache(p, regex_flavor);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We pass two regmatch_t structs to get info about the overall match and
|
* We pass two regmatch_t structs to get info about the overall match and
|
||||||
@ -585,34 +590,37 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
|||||||
* is a parenthesized subexpression, we return what it matched; else
|
* is a parenthesized subexpression, we return what it matched; else
|
||||||
* return what the whole regexp matched.
|
* return what the whole regexp matched.
|
||||||
*/
|
*/
|
||||||
match = RE_compile_and_execute(p,
|
if (!RE_execute(re,
|
||||||
VARDATA_ANY(s),
|
VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
|
||||||
VARSIZE_ANY_EXHDR(s),
|
2, pmatch))
|
||||||
regex_flavor,
|
PG_RETURN_NULL(); /* definitely no match */
|
||||||
2, pmatch);
|
|
||||||
|
|
||||||
/* match? then return the substring matching the pattern */
|
if (re->re_nsub > 0)
|
||||||
if (match)
|
|
||||||
{
|
{
|
||||||
int so,
|
/* has parenthesized subexpressions, use the first one */
|
||||||
eo;
|
|
||||||
|
|
||||||
so = pmatch[1].rm_so;
|
so = pmatch[1].rm_so;
|
||||||
eo = pmatch[1].rm_eo;
|
eo = pmatch[1].rm_eo;
|
||||||
if (so < 0 || eo < 0)
|
}
|
||||||
{
|
else
|
||||||
/* no parenthesized subexpression */
|
{
|
||||||
so = pmatch[0].rm_so;
|
/* no parenthesized subexpression, use whole match */
|
||||||
eo = pmatch[0].rm_eo;
|
so = pmatch[0].rm_so;
|
||||||
}
|
eo = pmatch[0].rm_eo;
|
||||||
|
|
||||||
return DirectFunctionCall3(text_substr,
|
|
||||||
PointerGetDatum(s),
|
|
||||||
Int32GetDatum(so + 1),
|
|
||||||
Int32GetDatum(eo - so));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_RETURN_NULL();
|
/*
|
||||||
|
* It is possible to have a match to the whole pattern but no match
|
||||||
|
* for a subexpression; for example 'foo(bar)?' is considered to match
|
||||||
|
* 'foo' but there is no subexpression match. So this extra test for
|
||||||
|
* match failure is not redundant.
|
||||||
|
*/
|
||||||
|
if (so < 0 || eo < 0)
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
|
||||||
|
return DirectFunctionCall3(text_substr,
|
||||||
|
PointerGetDatum(s),
|
||||||
|
Int32GetDatum(so + 1),
|
||||||
|
Int32GetDatum(eo - so));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user