From 9f504eab49a621f3dacbc6db66e8208085c3a3e9 Mon Sep 17 00:00:00 2001 From: drh Date: Sat, 23 Feb 2008 21:55:39 +0000 Subject: [PATCH] Fix a bug in the LIKE optimizer that occurs when the last character before the wildcard is an upper-case 'Z'. Ticket #2959. (CVS 4807) FossilOrigin-Name: 610574b23b5e73b71be71df66e084c5bf37f6ccd --- manifest | 14 +++---- manifest.uuid | 2 +- src/where.c | 32 ++++++++++---- test/like.test | 110 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 141 insertions(+), 17 deletions(-) diff --git a/manifest b/manifest index 8e44d274af..fc1d79550a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Additional\stest\scases.\s(CVS\s4806) -D 2008-02-21T21:30:07 +C Fix\sa\sbug\sin\sthe\sLIKE\soptimizer\sthat\soccurs\swhen\sthe\slast\scharacter\nbefore\sthe\swildcard\sis\san\supper-case\s'Z'.\s\sTicket\s#2959.\s(CVS\s4807) +D 2008-02-23T21:55:40 F Makefile.arm-wince-mingw32ce-gcc ac5f7b2cef0cd850d6f755ba6ee4ab961b1fadf7 F Makefile.in bc2b5df3e3d0d4b801b824b7ef6dec43812b049b F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -181,7 +181,7 @@ F src/vdbeblob.c 63c750acc7b5012479f508c0e9627372a82cb65d F src/vdbefifo.c a30c237b2a3577e1415fb6e288cbb6b8ed1e5736 F src/vdbemem.c 2d87d65430519dfb2103498b76bf15e4038b12b0 F src/vtab.c dc8947c9c79780b19ee6d6bae4ea624a2a303353 -F src/where.c 7ff0ca021cc77086e367ed3b6308fbc6bc9e1a31 +F src/where.c f899a98f9823b965730d94b1c7c425f9ba914c12 F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/all.test d12210212bada2bde6d5aeb90969b86c1aa977d2 @@ -361,7 +361,7 @@ F test/join5.test 86675fc2919269aa923c84dd00ee4249b97990fe F test/journal1.test 36f2d1bb9bf03f790f43fbdb439e44c0657fab19 F test/lastinsert.test 474d519c68cb79d07ecae56a763aa7f322c72f51 F test/laststmtchanges.test 18ead86c8a87ade949a1d5658f6dc4bb111d1b02 -F test/like.test d2d5efc471db314f662a6f0cfa52c6ec2d4fed8b +F test/like.test 2a3ddbd5d91503f914eabae67a47c4196fe33a58 F test/limit.test ca61a9fc520f54470edb3a771167fe4b68abc247 F test/loadext.test 1911e7365a6d31d77ba00dd3a8a31b7f2111a670 F test/loadext2.test 95ca7e2cb03fd3e068de97c3a2fe58dbdfd769e1 @@ -621,7 +621,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5 -P 4c9a7b42b797f42f2b1e2e8d6a260044bd22ef2d -R 15fbbd63bac94bf4cf3996d2a4103198 +P 74126bf4e605dc875adbf5a0ee83bf8112891e35 +R 61ff14acc658996aa4f2c96b0d9cdcfb U drh -Z 3ade4f046651b84bc599a69e7912090d +Z bc51f98ef19af3970b42d08ccd4bf327 diff --git a/manifest.uuid b/manifest.uuid index 4ae7915526..5ec44852f8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -74126bf4e605dc875adbf5a0ee83bf8112891e35 \ No newline at end of file +610574b23b5e73b71be71df66e084c5bf37f6ccd \ No newline at end of file diff --git a/src/where.c b/src/where.c index 50bcea2ddb..0c222204c5 100644 --- a/src/where.c +++ b/src/where.c @@ -16,7 +16,7 @@ ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** -** $Id: where.c,v 1.286 2008/01/23 12:52:41 drh Exp $ +** $Id: where.c,v 1.287 2008/02/23 21:55:40 drh Exp $ */ #include "sqliteInt.h" @@ -517,19 +517,22 @@ static int isLikeOrGlob( sqlite3 *db, /* The database */ Expr *pExpr, /* Test this expression */ int *pnPattern, /* Number of non-wildcard prefix characters */ - int *pisComplete /* True if the only wildcard is % in the last character */ + int *pisComplete, /* True if the only wildcard is % in the last character */ + int *pnoCase /* True if uppercase is equivalent to lowercase */ ){ const char *z; Expr *pRight, *pLeft; ExprList *pList; int c, cnt; - int noCase; char wc[3]; CollSeq *pColl; - if( !sqlite3IsLikeFunction(db, pExpr, &noCase, wc) ){ + if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){ return 0; } +#ifdef SQLITE_EBCDIC + if( *pnoCase ) return 0; +#endif pList = pExpr->pList; pRight = pList->a[0].pExpr; if( pRight->op!=TK_STRING ){ @@ -545,8 +548,8 @@ static int isLikeOrGlob( /* No collation is defined for the ROWID. Use the default. */ pColl = db->pDfltColl; } - if( (pColl->type!=SQLITE_COLL_BINARY || noCase) && - (pColl->type!=SQLITE_COLL_NOCASE || !noCase) ){ + if( (pColl->type!=SQLITE_COLL_BINARY || *pnoCase) && + (pColl->type!=SQLITE_COLL_NOCASE || !*pnoCase) ){ return 0; } sqlite3DequoteExpr(db, pRight); @@ -716,6 +719,7 @@ static void exprAnalyze( Bitmask prereqAll; int nPattern; int isComplete; + int noCase; int op; Parse *pParse = pWC->pParse; sqlite3 *db = pParse->db; @@ -886,8 +890,16 @@ or_not_possible: #ifndef SQLITE_OMIT_LIKE_OPTIMIZATION /* Add constraints to reduce the search space on a LIKE or GLOB ** operator. + ** + ** A like pattern of the form "x LIKE 'abc%'" is changed into constraints + ** + ** x>='abc' AND x<'abd' AND x LIKE 'abc%' + ** + ** The last character of the prefix "abc" is incremented to form the + ** termination condidtion "abd". This trick of incrementing the last + ** is not 255 and if the character set is not EBCDIC. */ - if( isLikeOrGlob(db, pExpr, &nPattern, &isComplete) ){ + if( isLikeOrGlob(db, pExpr, &nPattern, &isComplete, &noCase) ){ Expr *pLeft, *pRight; Expr *pStr1, *pStr2; Expr *pNewExpr1, *pNewExpr2; @@ -903,8 +915,12 @@ or_not_possible: } pStr2 = sqlite3ExprDup(db, pStr1); if( !db->mallocFailed ){ + u8 c, *pC; assert( pStr2->token.dyn ); - ++*(u8*)&pStr2->token.z[nPattern-1]; + pC = (u8*)&pStr2->token.z[nPattern-1]; + c = *pC; + if( noCase ) c = sqlite3UpperToLower[c]; + *pC = c + 1; } pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprDup(db,pLeft), pStr1, 0); idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC); diff --git a/test/like.test b/test/like.test index 236529858e..57f1086d91 100644 --- a/test/like.test +++ b/test/like.test @@ -13,7 +13,7 @@ # in particular the optimizations that occur to help those operators # run faster. # -# $Id: like.test,v 1.8 2008/01/23 12:52:41 drh Exp $ +# $Id: like.test,v 1.9 2008/02/23 21:55:40 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -381,6 +381,114 @@ do_test like-5.7 { do_test like-5.8 { set sqlite_like_count } 12 +do_test like-5.11 { + execsql {PRAGMA case_sensitive_like=off} + set sqlite_like_count 0 + queryplan { + SELECT x FROM t1 WHERE x LIKE 'ABC%' ORDER BY 1 + } +} {ABC {ABC abc xyz} abc abcd nosort {} i1} +do_test like-5.12 { + set sqlite_like_count +} 12 +do_test like-5.13 { + set sqlite_like_count 0 + queryplan { + SELECT x FROM t2 WHERE x LIKE 'ABC%' ORDER BY 1 + } +} {abc ABC {ABC abc xyz} abcd nosort {} i2} +do_test like-5.14 { + set sqlite_like_count +} 0 +do_test like-5.15 { + execsql { + PRAGMA case_sensitive_like=on; + } + set sqlite_like_count 0 + queryplan { + SELECT x FROM t2 WHERE x LIKE 'ABC%' ORDER BY 1 + } +} {ABC {ABC abc xyz} nosort {} i2} +do_test like-5.16 { + set sqlite_like_count +} 12 +do_test like-5.17 { + execsql { + PRAGMA case_sensitive_like=off; + } + set sqlite_like_count 0 + queryplan { + SELECT x FROM t2 WHERE x GLOB 'ABC*' ORDER BY 1 + } +} {ABC {ABC abc xyz} nosort {} i2} +do_test like-5.18 { + set sqlite_like_count +} 12 + +# Boundary case. The prefix for a LIKE comparison is rounded up +# when constructing the comparison. Example: "ab" becomes "ac". +# In other words, the last character is increased by one. +# +# Make sure this happens correctly when the last character is a +# "z" and we are doing case-insensitive comparisons. +# +# Ticket #2959 +# +do_test like-5.21 { + execsql { + PRAGMA case_sensitive_like=off; + INSERT INTO t2 VALUES('ZZ-upper-upper'); + INSERT INTO t2 VALUES('zZ-lower-upper'); + INSERT INTO t2 VALUES('Zz-upper-lower'); + INSERT INTO t2 VALUES('zz-lower-lower'); + } + queryplan { + SELECT x FROM t2 WHERE x LIKE 'zz%'; + } +} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2} +do_test like-5.22 { + queryplan { + SELECT x FROM t2 WHERE x LIKE 'zZ%'; + } +} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2} +do_test like-5.23 { + queryplan { + SELECT x FROM t2 WHERE x LIKE 'Zz%'; + } +} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2} +do_test like-5.24 { + queryplan { + SELECT x FROM t2 WHERE x LIKE 'ZZ%'; + } +} {zz-lower-lower zZ-lower-upper Zz-upper-lower ZZ-upper-upper nosort {} i2} +do_test like-5.25 { + queryplan { + PRAGMA case_sensitive_like=on; + CREATE TABLE t3(x); + CREATE INDEX i3 ON t3(x); + INSERT INTO t3 VALUES('ZZ-upper-upper'); + INSERT INTO t3 VALUES('zZ-lower-upper'); + INSERT INTO t3 VALUES('Zz-upper-lower'); + INSERT INTO t3 VALUES('zz-lower-lower'); + SELECT x FROM t3 WHERE x LIKE 'zz%'; + } +} {zz-lower-lower nosort {} i3} +do_test like-5.26 { + queryplan { + SELECT x FROM t3 WHERE x LIKE 'zZ%'; + } +} {zZ-lower-upper nosort {} i3} +do_test like-5.27 { + queryplan { + SELECT x FROM t3 WHERE x LIKE 'Zz%'; + } +} {Zz-upper-lower nosort {} i3} +do_test like-5.28 { + queryplan { + SELECT x FROM t3 WHERE x LIKE 'ZZ%'; + } +} {ZZ-upper-upper nosort {} i3} + # ticket #2407 #