From 6f3439697e4025bc9209ae0256b1f2928ca9a073 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 1 Jul 2011 18:26:40 +0000 Subject: [PATCH] Improve use of indexes to optimize DISTINCT queries. FossilOrigin-Name: 6c202ea0247ff509f696eee3839975a88ed26cf2 --- manifest | 16 ++-- manifest.uuid | 2 +- src/where.c | 225 ++++++++++++++++++++++++++++++--------------- test/distinct.test | 48 ++++++++++ test/tester.tcl | 2 +- 5 files changed, 210 insertions(+), 83 deletions(-) diff --git a/manifest b/manifest index 7cf6c1516e..2ac880f628 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Improvements\sand\stests\sfor\sdetection\sof\sredundant\sDISTINCT\squalifiers. -D 2011-07-01T14:21:38.743 +C Improve\suse\sof\sindexes\sto\soptimize\sDISTINCT\squeries. +D 2011-07-01T18:26:40.769 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -250,7 +250,7 @@ F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582 F src/wal.c 0c70ad7b1cac6005fa5e2cbefd23ee05e391c290 F src/wal.h 66b40bd91bc29a5be1c88ddd1f5ade8f3f48728a F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f -F src/where.c 207cf2c12b391cfdfae89ca3c8afe2adbf46f712 +F src/where.c 83cf8e8637826a311e598980bf5c920069847a77 F test/8_3_names.test b93687beebd17f6ebf812405a6833bae5d1f4199 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87 @@ -368,7 +368,7 @@ F test/descidx1.test b1353c1a15cfbee97b13a1dcedaf0fe78163ba6a F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f F test/diskfull.test 0cede7ef9d8f415d9d3944005c76be7589bb5ebb -F test/distinct.test 17985484790bbb6d098af682f808f11940ca6375 +F test/distinct.test dbedaddb15c7a96729d39e5c3d7c2993ac905a1d F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376 F test/e_createtable.test 4771686a586b6ae414f927c389b2c101cc05c028 F test/e_delete.test e2ae0d3fce5efd70fef99025e932afffc5616fab @@ -696,7 +696,7 @@ F test/tclsqlite.test 8c154101e704170c2be10f137a5499ac2c6da8d3 F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c F test/temptable.test 1a21a597055dcf6002b6f1ee867632dccd6e925c F test/temptrigger.test b0273db072ce5f37cf19140ceb1f0d524bbe9f05 -F test/tester.tcl 7d3d17914ca5397a82f801db733b81d4a82f50c3 +F test/tester.tcl aad52c7644513e5650f6613d371b0af3193c3885 F test/thread001.test a3e6a7254d1cb057836cb3145b60c10bf5b7e60f F test/thread002.test afd20095e6e845b405df4f2c920cb93301ca69db F test/thread003.test b824d4f52b870ae39fc5bae4d8070eca73085dca @@ -950,7 +950,7 @@ F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00 F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262 -P f7ba0219ef2f235543c258be736955d91ca5ecce -R 778c4d57ca4312ecfa96b1ac67177d07 +P 7337293c87fb563604dd6ad284f2d1e30c938b4c +R 0a65cc1234bf3efb15e20cfbd6948f45 U dan -Z 94d823140247a8802d583b432d46d049 +Z be26c18c91323ba4302e319914f9eafb diff --git a/manifest.uuid b/manifest.uuid index a582d766d2..f1ce564ac1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -7337293c87fb563604dd6ad284f2d1e30c938b4c \ No newline at end of file +6c202ea0247ff509f696eee3839975a88ed26cf2 \ No newline at end of file diff --git a/src/where.c b/src/where.c index 70e9722d8c..5cbf5833c6 100644 --- a/src/where.c +++ b/src/where.c @@ -1398,6 +1398,156 @@ static int referencesOtherTables( return 0; } +/* +** This function searches the expression list passed as the second argument +** for an expression of type TK_COLUMN that refers to the same column and +** uses the same collation sequence as the iCol'th column of index pIdx. +** Argument iBase is the cursor number used for the table that pIdx refers +** to. +** +** If such an expression is found, its index in pList->a[] is returned. If +** no expression is found, -1 is returned. +*/ +static int findIndexCol( + Parse *pParse, /* Parse context */ + ExprList *pList, /* Expression list to search */ + int iBase, /* Cursor for table associated with pIdx */ + Index *pIdx, /* Index to match column of */ + int iCol /* Column of index to match */ +){ + int i; + const char *zColl = pIdx->azColl[iCol]; + + for(i=0; inExpr; i++){ + Expr *p = pList->a[i].pExpr; + if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); + if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ + return i; + } + } + } + + return -1; +} + +/* +** This routine determines if pIdx can be used to assist in processing a +** DISTINCT qualifier. In other words, it tests whether or not using this +** index for the outer loop guarantees that rows with equal values for +** all expressions in the pDistinct list are delivered grouped together. +** +** For example, the query +** +** SELECT DISTINCT a, b, c FROM tbl WHERE a = ? +** +** can benefit from any index on columns "b" and "c". +*/ +static int isDistinctIndex( + Parse *pParse, /* Parsing context */ + WhereClause *pWC, /* The WHERE clause */ + Index *pIdx, /* The index being considered */ + int base, /* Cursor number for the table pIdx is on */ + ExprList *pDistinct, /* The DISTINCT expressions */ + int nEqCol /* Number of index columns with == */ +){ + Bitmask mask = 0; /* Mask of unaccounted for pDistinct exprs */ + int i; /* Iterator variable */ + + if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0; + + /* Loop through all the expressions in the distinct list. If any of them + ** are not simple column references, return early. Otherwise, test if the + ** WHERE clause contains a "col=X" clause. If it does, the expression + ** can be ignored. If it does not, and the column does not belong to the + ** same table as index pIdx, return early. Finally, if there is no + ** matching "col=X" expression and the column is on the same table as pIdx, + ** set the corresponding bit in variable mask. + */ + for(i=0; inExpr; i++){ + WhereTerm *pTerm; + Expr *p = pDistinct->a[i].pExpr; + if( p->op!=TK_COLUMN ) return 0; + pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0); + if( pTerm ){ + Expr *pX = pTerm->pExpr; + CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); + CollSeq *p2 = sqlite3ExprCollSeq(pParse, p); + if( p1==p2 ) continue; + } + if( p->iTable!=base ) return 0; + mask |= (((Bitmask)1) << i); + } + + for(i=nEqCol; mask && inColumn; i++){ + int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i); + if( iExpr<0 ) break; + mask &= ~(((Bitmask)1) << iExpr); + } + + return (mask==0); +} + + +/* +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. A DISTINCT list is redundant if the database contains a +** UNIQUE index that guarantees that the result of the query will be distinct +** anyway. +*/ +static int isDistinctRedundant( + Parse *pParse, + SrcList *pTabList, + WhereClause *pWC, + ExprList *pDistinct +){ + Table *pTab; + Index *pIdx; + int i; + int iBase; + + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; + + /* If any of the expressions is an IPK column, then return true. */ + for(i=0; inExpr; i++){ + Expr *p = pDistinct->a[i].pExpr; + assert( p->op!=TK_COLUMN || p->iTable==iBase ); + if( p->op==TK_COLUMN && p->iColumn<0 ) return 1; + } + + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->onError==OE_None ) continue; + for(i=0; inColumn; i++){ + int iCol = pIdx->aiColumn[i]; + if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) + && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i) + ){ + break; + } + } + if( i==pIdx->nColumn ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; + } + } + + return 0; +} /* ** This routine decides if pIdx can be used to satisfy the ORDER BY @@ -2910,9 +3060,7 @@ static void bestBtreeIndex( /* If there is a DISTINCT qualifier and this index will scan rows in ** order of the DISTINCT expressions, clear bDist and set the appropriate ** flags in wsFlags. */ - if( isSortingIndex( - pParse, pWC->pMaskSet, pProbe, iCur, pDistinct, nEq, wsFlags, 0) - ){ + if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){ bDist = 0; wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT; } @@ -4288,75 +4436,6 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ } } -/* -** Return true if the DISTINCT expression-list passed as the third argument -** is redundant. A DISTINCT list is redundant if the database contains a -** UNIQUE index that guarantees that the result of the query will be distinct -** anyway. -*/ -static int whereDistinctRedundant( - Parse *pParse, - SrcList *pTabList, - WhereClause *pWC, - ExprList *pDistinct -){ - Table *pTab; - Index *pIdx; - int i; - int iBase; - - /* If there is more than one table or sub-select in the FROM clause of - ** this query, then it will not be possible to show that the DISTINCT - ** clause is redundant. */ - if( pTabList->nSrc!=1 ) return 0; - iBase = pTabList->a[0].iCursor; - pTab = pTabList->a[0].pTab; - - /* If any of the expressions is an IPK column, then return true. */ - for(i=0; inExpr; i++){ - Expr *p = pDistinct->a[i].pExpr; - assert( p->op!=TK_COLUMN || p->iTable==iBase ); - if( p->op==TK_COLUMN && p->iColumn<0 ) return 1; - } - - /* Loop through all indices on the table, checking each to see if it makes - ** the DISTINCT qualifier redundant. It does so if: - ** - ** 1. The index is itself UNIQUE, and - ** - ** 2. All of the columns in the index are either part of the pDistinct - ** list, or else the WHERE clause contains a term of the form "col=X", - ** where X is a constant value. The collation sequences of the - ** comparison and select-list expressions must match those of the index. - */ - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - if( pIdx->onError==OE_None ) continue; - for(i=0; inColumn; i++){ - int iCol = pIdx->aiColumn[i]; - const char *zColl = pIdx->azColl[i]; - - if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ - int j; - for(j=0; jnExpr; j++){ - Expr *p = pDistinct->a[j].pExpr; - assert( p->op!=TK_COLUMN || p->iTable==iBase ); - if( p->op==TK_COLUMN && p->iColumn==iCol ){ - CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); - if( pColl && 0==sqlite3StrICmp(zColl, pColl->zName) ) break; - } - } - if( j==pDistinct->nExpr ) break; - } - } - if( i==pIdx->nColumn ){ - /* This index implies that the DISTINCT qualifier is redundant. */ - return 1; - } - } - - return 0; -} - /* ** Generate the beginning of the loop used for WHERE clause processing. @@ -4583,7 +4662,7 @@ WhereInfo *sqlite3WhereBegin( ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. */ - if( pDistinct && whereDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){ + if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){ pDistinct = 0; pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; } diff --git a/test/distinct.test b/test/distinct.test index f855caa833..14252b23fe 100644 --- a/test/distinct.test +++ b/test/distinct.test @@ -41,6 +41,23 @@ proc do_distinct_not_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 0] } +proc do_temptables_test {tn sql temptables} { + uplevel [list do_test $tn [subst -novar { + set ret "" + db eval "EXPLAIN [set sql]" { + if {$opcode == "OpenEphemeral"} { + if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" } + if {$p5 == "10"} { + lappend ret hash + } else { + lappend ret btree + } + } + } + set ret + }] $temptables] +} + #------------------------------------------------------------------------- # The following tests - distinct-1.* - check that the planner correctly @@ -107,6 +124,37 @@ foreach {tn noop sql} { } } +#------------------------------------------------------------------------- +# The following tests - distinct-2.* - test cases where an index is +# used to deliver results in order of the DISTINCT expressions. +# +drop_all_tables +do_execsql_test 2.0 { + CREATE TABLE t1(a, b, c); + + CREATE INDEX i1 ON t1(a, b); + CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase); + + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('A', 'B', 'C'); + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('A', 'B', 'C'); +} + +foreach {tn sql temptables res} { + 1 "a, b FROM t1" {} {A B a b} + 2 "b, a FROM t1" {} {B A b a} + 3 "a, b, c FROM t1" {hash} {a b c A B C} + 4 "a, b, c FROM t1 ORDER BY a, b, c" {btree} {A B C a b c} + 5 "b FROM t1 WHERE a = 'a'" {} {b} + 6 "b FROM t1" {hash} {b B} + 7 "a FROM t1" {} {A a} + 8 "b COLLATE nocase FROM t1" {} {b} + 9 "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {} {B} +} { + do_execsql_test 2.$tn.1 "SELECT DISTINCT $sql" $res + do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables +} diff --git a/test/tester.tcl b/test/tester.tcl index 8624821a5e..fabb32af52 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -20,7 +20,7 @@ # Commands to manipulate the db and the file-system at a high level: # # copy_file FROM TO -# drop_all_table ?DB? +# drop_all_tables ?DB? # forcedelete FILENAME # # Test the capability of the SQLite version built into the interpreter to