Still more performance enhancements to the LIKE and GLOB operators.

FossilOrigin-Name: 6c8924cacc2b875270770fed2cc3b1884f57a655
This commit is contained in:
drh 2014-09-25 11:08:57 +00:00
parent 88b3322f27
commit 9fdfdc893b
3 changed files with 65 additions and 47 deletions

View File

@ -1,5 +1,5 @@
C More\sperformance\soptimization\sfor\sthe\sLIKE\sand\sGLOB\soperators.
D 2014-09-25T03:51:37.139
C Still\smore\sperformance\senhancements\sto\sthe\sLIKE\sand\sGLOB\soperators.
D 2014-09-25T11:08:57.081
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -184,7 +184,7 @@ F src/delete.c fae81cc2eb14b75267d4f47d3cfc9ae02aae726f
F src/expr.c f32119248996680aa73c5c37bfdd42820804dc17
F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb
F src/fkey.c da985ae673efef2c712caef825a5d2edb087ead7
F src/func.c 610b18afde750686785cdad9196b9fb1b03dc9b3
F src/func.c 727a324e87a3392a47e44568b901d2fb96ba0ed4
F src/global.c 5110fa12e09729b84eee0191c984ec4008e21937
F src/hash.c 4263fbc955f26c2e8cdc0cf214bc42435aa4e4f5
F src/hash.h c8f3c31722cf3277d03713909761e152a5b81094
@ -1200,7 +1200,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P ef30e0352b3d4a29749cd0872c10e45a6649ec52
R 8a27395e5bbddced71b289eca8fe4771
P 5ab1023d6cfe31fa8a194804b8216058977ac973
R 3c45ffccf6b8b9761c0cc8bd190b6e11
U drh
Z c372796d929c11ef7d059e5ca2e0eb4a
Z 88a762d5e9ae73a0ed674e2385a2b544

View File

@ -1 +1 @@
5ab1023d6cfe31fa8a194804b8216058977ac973
6c8924cacc2b875270770fed2cc3b1884f57a655

View File

@ -585,7 +585,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression. Return true (1) if they
** potentially be a "glob" or "like" expression. Return true (1) if they
** are the same and false (0) if they are different.
**
** Globbing rules:
@ -605,11 +605,18 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
** "[a-z]" matches any single lower-case letter. To match a '-', make
** it the last character in the list.
**
** Like matching rules:
**
** '%' Matches any sequence of zero or more characters
**
*** '_' Matches any one character
**
** Ec Where E is the "esc" character and c is any other
** character, including '%', '_', and esc, match exactly c.
**
** The comments through this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
**
** Hints: to match '*' or '?', put them in "[]". Like this:
**
** abc[*]xyz Matches "abc*xyz" only
*/
static int patternCompare(
const u8 *zPattern, /* The glob pattern */
@ -617,13 +624,12 @@ static int patternCompare(
const struct compareInfo *pInfo, /* Information about how to do the compare */
u32 esc /* The escape character */
){
u32 c, c2;
int invert;
int seen;
u32 matchOne = pInfo->matchOne;
u32 matchAll = pInfo->matchAll;
u32 matchOther;
u8 noCase = pInfo->noCase;
u32 c, c2; /* Next pattern and input string chars */
u32 matchOne = pInfo->matchOne; /* "?" or "_" */
u32 matchAll = pInfo->matchAll; /* "*" or "%" */
u32 matchOther; /* "[" or the escape character */
u8 noCase = pInfo->noCase; /* True if uppercase==lowercase */
const u8 *zEscaped = 0; /* One past the last escaped input char */
/* The GLOB operator does not have an ESCAPE clause. And LIKE does not
** have the matchSet operator. So we either have to look for one or
@ -633,7 +639,10 @@ static int patternCompare(
matchOther = esc ? esc : pInfo->matchSet;
while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
if( c==matchAll ){
if( c==matchAll ){ /* Match "*" */
/* Skip over multiple "*" characters in the pattern. If there
** are also "?" characters, skip those as well, but consume a
** single character of the input string for each "?" skipped */
while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
|| c == matchOne ){
if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
@ -641,12 +650,14 @@ static int patternCompare(
}
}
if( c==0 ){
return 1;
return 1; /* "*" at the end of the pattern matches */
}else if( c==matchOther ){
if( esc ){
c = sqlite3Utf8Read(&zPattern);
if( c==0 ) return 0;
}else{
/* "[...]" immediately follows the "*". We have to do a slow
** recursive search in this case, but it is an unusual case. */
assert( matchOther<0x80 ); /* '[' is a single-byte character */
while( *zString
&& patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
@ -655,39 +666,45 @@ static int patternCompare(
return *zString!=0;
}
}
while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
if( noCase && c<0x80 ){
GlobUpperToLower(c2);
GlobUpperToLowerAscii(c);
while( c2 != 0 && c2 != c ){
do{ c2 = *(zString++); }while( c2>0x7f );
GlobUpperToLowerAscii(c2);
}
/* At this point variable c contains the first character of the
** pattern string past the "*". Search in the input string for the
** first matching character and recursively contine the match from
** that point.
**
** For a case-insensitive search, set variable cx to be the same as
** c but in the other case and search the input string for either
** c or cx.
*/
if( c<=0x80 ){
u32 cx;
if( noCase ){
cx = sqlite3Toupper(c);
c = sqlite3Tolower(c);
}else{
while( c2 != 0 && c2 != c ){
c2 = sqlite3Utf8Read(&zString);
}
cx = c;
}
while( (c2 = *(zString++))!=0 ){
if( c2!=c && c2!=cx ) continue;
if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
}
}else{
while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
if( c2!=c ) continue;
if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
}
if( c2==0 ) return 0;
if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
}
return 0;
}
if( c==matchOne ){
if( sqlite3Utf8Read(&zString)==0 ){
return 0;
}else{
continue;
}
}
if( c==matchOther ){
if( esc ){
c = sqlite3Utf8Read(&zPattern);
if( c==0 ) return 0;
zEscaped = zPattern;
}else{
u32 prior_c = 0;
seen = 0;
invert = 0;
int seen = 0;
int invert = 0;
c = sqlite3Utf8Read(&zString);
if( c==0 ) return 0;
c2 = sqlite3Utf8Read(&zPattern);
@ -720,10 +737,11 @@ static int patternCompare(
}
c2 = sqlite3Utf8Read(&zString);
if( c==c2 ) continue;
if( !noCase ) return 0;
GlobUpperToLower(c);
GlobUpperToLower(c2);
if( c!=c2 ) return 0;
if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){
continue;
}
if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue;
return 0;
}
return *zString==0;
}