Rewrite LIKE's %-followed-by-_ optimization so it really works (this time
for sure ;-)). It now also optimizes more cases, such as %_%_. Improve comments too. Per bug #5478. In passing, also rename the TCHAR macro to GETCHAR, because pgindent is messing with the formatting of the former (apparently it now thinks TCHAR is a typedef name). Back-patch to 8.3, where the bug was introduced.
This commit is contained in:
parent
813cd7cb1f
commit
b7d7df63c8
@ -14,12 +14,12 @@
|
|||||||
* NextChar
|
* NextChar
|
||||||
* MatchText - to name of function wanted
|
* MatchText - to name of function wanted
|
||||||
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
|
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
|
||||||
* MATCH_LOWER - define iff using to_lower on text chars
|
* MATCH_LOWER - define for case (4), using to_lower on single-byte chars
|
||||||
*
|
*
|
||||||
* Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
* Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.26 2009/06/11 14:49:03 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.26.2.1 2010/05/28 17:35:30 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -70,9 +70,9 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef MATCH_LOWER
|
#ifdef MATCH_LOWER
|
||||||
#define TCHAR(t) ((char) tolower((unsigned char) (t)))
|
#define GETCHAR(t) ((char) tolower((unsigned char) (t)))
|
||||||
#else
|
#else
|
||||||
#define TCHAR(t) (t)
|
#define GETCHAR(t) (t)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -101,76 +101,72 @@ MatchText(char *t, int tlen, char *p, int plen)
|
|||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
||||||
errmsg("LIKE pattern must not end with escape character")));
|
errmsg("LIKE pattern must not end with escape character")));
|
||||||
if (TCHAR (*p) != TCHAR (*t))
|
if (GETCHAR(*p) != GETCHAR(*t))
|
||||||
return LIKE_FALSE;
|
return LIKE_FALSE;
|
||||||
}
|
}
|
||||||
else if (*p == '%')
|
else if (*p == '%')
|
||||||
{
|
{
|
||||||
/*
|
char firstpat;
|
||||||
* % processing is essentially a search for a match for what
|
|
||||||
* follows the %, plus a recursive match of the remainder. We
|
|
||||||
* succeed if and only if both conditions are met.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* %% is the same as % according to the SQL standard */
|
/*
|
||||||
/* Advance past all %'s */
|
* % processing is essentially a search for a text position at
|
||||||
while (plen > 0 && *p == '%')
|
* which the remainder of the text matches the remainder of the
|
||||||
|
* pattern, using a recursive call to check each potential match.
|
||||||
|
*
|
||||||
|
* If there are wildcards immediately following the %, we can skip
|
||||||
|
* over them first, using the idea that any sequence of N _'s and
|
||||||
|
* one or more %'s is equivalent to N _'s and one % (ie, it will
|
||||||
|
* match any sequence of at least N text characters). In this
|
||||||
|
* way we will always run the recursive search loop using a
|
||||||
|
* pattern fragment that begins with a literal character-to-match,
|
||||||
|
* thereby not recursing more than we have to.
|
||||||
|
*/
|
||||||
NextByte(p, plen);
|
NextByte(p, plen);
|
||||||
/* Trailing percent matches everything. */
|
|
||||||
|
while (plen > 0)
|
||||||
|
{
|
||||||
|
if (*p == '%')
|
||||||
|
NextByte(p, plen);
|
||||||
|
else if (*p == '_')
|
||||||
|
{
|
||||||
|
/* If not enough text left to match the pattern, ABORT */
|
||||||
|
if (tlen <= 0)
|
||||||
|
return LIKE_ABORT;
|
||||||
|
NextChar(t, tlen);
|
||||||
|
NextByte(p, plen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break; /* Reached a non-wildcard pattern char */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're at end of pattern, match: we have a trailing % which
|
||||||
|
* matches any remaining text string.
|
||||||
|
*/
|
||||||
if (plen <= 0)
|
if (plen <= 0)
|
||||||
return LIKE_TRUE;
|
return LIKE_TRUE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Otherwise, scan for a text position at which we can match the
|
* Otherwise, scan for a text position at which we can match the
|
||||||
* rest of the pattern.
|
* rest of the pattern. The first remaining pattern char is known
|
||||||
|
* to be a regular or escaped literal character, so we can compare
|
||||||
|
* the first pattern byte to each text byte to avoid recursing
|
||||||
|
* more than we have to. This fact also guarantees that we don't
|
||||||
|
* have to consider a match to the zero-length substring at the
|
||||||
|
* end of the text.
|
||||||
*/
|
*/
|
||||||
if (*p == '_')
|
|
||||||
{
|
|
||||||
/* %_ is the same as _% - avoid matching _ repeatedly */
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
NextChar(t, tlen);
|
|
||||||
NextByte(p, plen);
|
|
||||||
} while (tlen > 0 && plen > 0 && *p == '_');
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we are at the end of the pattern, succeed: % followed by
|
|
||||||
* n _'s matches any string of at least n characters, and we
|
|
||||||
* have now found there are at least n characters.
|
|
||||||
*/
|
|
||||||
if (plen <= 0)
|
|
||||||
return LIKE_TRUE;
|
|
||||||
|
|
||||||
/* Look for a place that matches the rest of the pattern */
|
|
||||||
while (tlen > 0)
|
|
||||||
{
|
|
||||||
int matched = MatchText(t, tlen, p, plen);
|
|
||||||
|
|
||||||
if (matched != LIKE_FALSE)
|
|
||||||
return matched; /* TRUE or ABORT */
|
|
||||||
|
|
||||||
NextChar(t, tlen);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
char firstpat = TCHAR (*p);
|
|
||||||
|
|
||||||
if (*p == '\\')
|
if (*p == '\\')
|
||||||
{
|
{
|
||||||
if (plen < 2)
|
if (plen < 2)
|
||||||
return LIKE_FALSE;
|
return LIKE_FALSE; /* XXX should throw error */
|
||||||
firstpat = TCHAR (p[1]);
|
firstpat = GETCHAR(p[1]);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
firstpat = GETCHAR(*p);
|
||||||
|
|
||||||
while (tlen > 0)
|
while (tlen > 0)
|
||||||
{
|
{
|
||||||
/*
|
if (GETCHAR(*t) == firstpat)
|
||||||
* Optimization to prevent most recursion: don't recurse
|
|
||||||
* unless first pattern byte matches first text byte.
|
|
||||||
*/
|
|
||||||
if (TCHAR (*t) == firstpat)
|
|
||||||
{
|
{
|
||||||
int matched = MatchText(t, tlen, p, plen);
|
int matched = MatchText(t, tlen, p, plen);
|
||||||
|
|
||||||
@ -180,7 +176,6 @@ MatchText(char *t, int tlen, char *p, int plen)
|
|||||||
|
|
||||||
NextChar(t, tlen);
|
NextChar(t, tlen);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* End of text with no match, so no point in trying later places
|
* End of text with no match, so no point in trying later places
|
||||||
@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen)
|
|||||||
NextByte(p, plen);
|
NextByte(p, plen);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (TCHAR (*p) != TCHAR (*t))
|
else if (GETCHAR(*p) != GETCHAR(*t))
|
||||||
{
|
{
|
||||||
/* non-wildcard pattern char fails to match text char */
|
/* non-wildcard pattern char fails to match text char */
|
||||||
return LIKE_FALSE;
|
return LIKE_FALSE;
|
||||||
@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen)
|
|||||||
if (tlen > 0)
|
if (tlen > 0)
|
||||||
return LIKE_FALSE; /* end of pattern, but not of text */
|
return LIKE_FALSE; /* end of pattern, but not of text */
|
||||||
|
|
||||||
/* End of text string. Do we have matching pattern remaining? */
|
/*
|
||||||
while (plen > 0 && *p == '%') /* allow multiple %'s at end of
|
* End of text, but perhaps not of pattern. Match iff the remaining
|
||||||
* pattern */
|
* pattern can match a zero-length string, ie, it's zero or more %'s.
|
||||||
|
*/
|
||||||
|
while (plen > 0 && *p == '%')
|
||||||
NextByte(p, plen);
|
NextByte(p, plen);
|
||||||
|
|
||||||
if (plen <= 0)
|
if (plen <= 0)
|
||||||
return LIKE_TRUE;
|
return LIKE_TRUE;
|
||||||
|
|
||||||
@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc)
|
|||||||
#undef do_like_escape
|
#undef do_like_escape
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef TCHAR
|
#undef GETCHAR
|
||||||
|
|
||||||
#ifdef MATCH_LOWER
|
#ifdef MATCH_LOWER
|
||||||
#undef MATCH_LOWER
|
#undef MATCH_LOWER
|
||||||
|
@ -903,7 +903,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
|
|||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test %/_ combination cases, cf bug #4821
|
-- test %/_ combination cases, cf bugs #4821 and #5478
|
||||||
--
|
--
|
||||||
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
|
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
|
||||||
t | t | f
|
t | t | f
|
||||||
@ -929,6 +929,12 @@ SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
|
|||||||
t | t | f
|
t | t | f
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'jack' LIKE '%____%' AS t;
|
||||||
|
t
|
||||||
|
---
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test implicit type conversion
|
-- test implicit type conversion
|
||||||
--
|
--
|
||||||
|
@ -280,7 +280,7 @@ SELECT 'Hawkeye' ILIKE 'h%' AS "true";
|
|||||||
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
|
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test %/_ combination cases, cf bug #4821
|
-- test %/_ combination cases, cf bugs #4821 and #5478
|
||||||
--
|
--
|
||||||
|
|
||||||
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
|
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
|
||||||
@ -289,6 +289,8 @@ SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
|
|||||||
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
|
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
|
||||||
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
|
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
|
||||||
|
|
||||||
|
SELECT 'jack' LIKE '%____%' AS t;
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- test implicit type conversion
|
-- test implicit type conversion
|
||||||
|
Loading…
x
Reference in New Issue
Block a user