From 965a2a191acdc6fc1cbdd2736a178f4d900e3765 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 19 Mar 2008 02:40:37 +0000 Subject: [PATCH] Fix regexp substring matching (substring(string from pattern)) for the corner case where there is a match to the pattern overall but the user has specified a parenthesized subexpression and that subexpression hasn't got a match. An example is substring('foo' from 'foo(bar)?'). This should return NULL, since (bar) isn't matched, but it was mistakenly returning the whole-pattern match instead (ie, 'foo'). Per bug #4044 from Rui Martins. This has been broken since the beginning; patch in all supported versions. The old behavior was sufficiently inconsistent that it's impossible to believe anyone is depending on it. --- src/backend/utils/adt/regexp.c | 56 +++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 359fd33cee..4a96a1bb70 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.78 2008/01/01 19:45:52 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.79 2008/03/19 02:40:37 tgl Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -576,8 +576,13 @@ textregexsubstr(PG_FUNCTION_ARGS) { text *s = PG_GETARG_TEXT_PP(0); text *p = PG_GETARG_TEXT_PP(1); - bool match; + regex_t *re; regmatch_t pmatch[2]; + int so, + eo; + + /* Compile RE */ + re = RE_compile_and_cache(p, regex_flavor); /* * We pass two regmatch_t structs to get info about the overall match and @@ -585,34 +590,37 @@ textregexsubstr(PG_FUNCTION_ARGS) * is a parenthesized subexpression, we return what it matched; else * return what the whole regexp matched. */ - match = RE_compile_and_execute(p, - VARDATA_ANY(s), - VARSIZE_ANY_EXHDR(s), - regex_flavor, - 2, pmatch); + if (!RE_execute(re, + VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s), + 2, pmatch)) + PG_RETURN_NULL(); /* definitely no match */ - /* match? then return the substring matching the pattern */ - if (match) + if (re->re_nsub > 0) { - int so, - eo; - + /* has parenthesized subexpressions, use the first one */ so = pmatch[1].rm_so; eo = pmatch[1].rm_eo; - if (so < 0 || eo < 0) - { - /* no parenthesized subexpression */ - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - } - - return DirectFunctionCall3(text_substr, - PointerGetDatum(s), - Int32GetDatum(so + 1), - Int32GetDatum(eo - so)); + } + else + { + /* no parenthesized subexpression, use whole match */ + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; } - PG_RETURN_NULL(); + /* + * It is possible to have a match to the whole pattern but no match + * for a subexpression; for example 'foo(bar)?' is considered to match + * 'foo' but there is no subexpression match. So this extra test for + * match failure is not redundant. + */ + if (so < 0 || eo < 0) + PG_RETURN_NULL(); + + return DirectFunctionCall3(text_substr, + PointerGetDatum(s), + Int32GetDatum(so + 1), + Int32GetDatum(eo - so)); } /*