Improve use of indexes to optimize DISTINCT queries.

FossilOrigin-Name: 6c202ea0247ff509f696eee3839975a88ed26cf2
This commit is contained in:
dan 2011-07-01 18:26:40 +00:00
parent 50118cdfdb
commit 6f3439697e
5 changed files with 210 additions and 83 deletions

View File

@ -1,5 +1,5 @@
C Improvements\sand\stests\sfor\sdetection\sof\sredundant\sDISTINCT\squalifiers.
D 2011-07-01T14:21:38.743
C Improve\suse\sof\sindexes\sto\soptimize\sDISTINCT\squeries.
D 2011-07-01T18:26:40.769
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -250,7 +250,7 @@ F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582
F src/wal.c 0c70ad7b1cac6005fa5e2cbefd23ee05e391c290
F src/wal.h 66b40bd91bc29a5be1c88ddd1f5ade8f3f48728a
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 207cf2c12b391cfdfae89ca3c8afe2adbf46f712
F src/where.c 83cf8e8637826a311e598980bf5c920069847a77
F test/8_3_names.test b93687beebd17f6ebf812405a6833bae5d1f4199
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
@ -368,7 +368,7 @@ F test/descidx1.test b1353c1a15cfbee97b13a1dcedaf0fe78163ba6a
F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d
F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f
F test/diskfull.test 0cede7ef9d8f415d9d3944005c76be7589bb5ebb
F test/distinct.test 17985484790bbb6d098af682f808f11940ca6375
F test/distinct.test dbedaddb15c7a96729d39e5c3d7c2993ac905a1d
F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376
F test/e_createtable.test 4771686a586b6ae414f927c389b2c101cc05c028
F test/e_delete.test e2ae0d3fce5efd70fef99025e932afffc5616fab
@ -696,7 +696,7 @@ F test/tclsqlite.test 8c154101e704170c2be10f137a5499ac2c6da8d3
F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c
F test/temptable.test 1a21a597055dcf6002b6f1ee867632dccd6e925c
F test/temptrigger.test b0273db072ce5f37cf19140ceb1f0d524bbe9f05
F test/tester.tcl 7d3d17914ca5397a82f801db733b81d4a82f50c3
F test/tester.tcl aad52c7644513e5650f6613d371b0af3193c3885
F test/thread001.test a3e6a7254d1cb057836cb3145b60c10bf5b7e60f
F test/thread002.test afd20095e6e845b405df4f2c920cb93301ca69db
F test/thread003.test b824d4f52b870ae39fc5bae4d8070eca73085dca
@ -950,7 +950,7 @@ F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00
F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262
P f7ba0219ef2f235543c258be736955d91ca5ecce
R 778c4d57ca4312ecfa96b1ac67177d07
P 7337293c87fb563604dd6ad284f2d1e30c938b4c
R 0a65cc1234bf3efb15e20cfbd6948f45
U dan
Z 94d823140247a8802d583b432d46d049
Z be26c18c91323ba4302e319914f9eafb

View File

@ -1 +1 @@
7337293c87fb563604dd6ad284f2d1e30c938b4c
6c202ea0247ff509f696eee3839975a88ed26cf2

View File

@ -1398,6 +1398,156 @@ static int referencesOtherTables(
return 0;
}
/*
** This function searches the expression list passed as the second argument
** for an expression of type TK_COLUMN that refers to the same column and
** uses the same collation sequence as the iCol'th column of index pIdx.
** Argument iBase is the cursor number used for the table that pIdx refers
** to.
**
** If such an expression is found, its index in pList->a[] is returned. If
** no expression is found, -1 is returned.
*/
static int findIndexCol(
Parse *pParse, /* Parse context */
ExprList *pList, /* Expression list to search */
int iBase, /* Cursor for table associated with pIdx */
Index *pIdx, /* Index to match column of */
int iCol /* Column of index to match */
){
int i;
const char *zColl = pIdx->azColl[iCol];
for(i=0; i<pList->nExpr; i++){
Expr *p = pList->a[i].pExpr;
if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){
CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){
return i;
}
}
}
return -1;
}
/*
** This routine determines if pIdx can be used to assist in processing a
** DISTINCT qualifier. In other words, it tests whether or not using this
** index for the outer loop guarantees that rows with equal values for
** all expressions in the pDistinct list are delivered grouped together.
**
** For example, the query
**
** SELECT DISTINCT a, b, c FROM tbl WHERE a = ?
**
** can benefit from any index on columns "b" and "c".
*/
static int isDistinctIndex(
Parse *pParse, /* Parsing context */
WhereClause *pWC, /* The WHERE clause */
Index *pIdx, /* The index being considered */
int base, /* Cursor number for the table pIdx is on */
ExprList *pDistinct, /* The DISTINCT expressions */
int nEqCol /* Number of index columns with == */
){
Bitmask mask = 0; /* Mask of unaccounted for pDistinct exprs */
int i; /* Iterator variable */
if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0;
/* Loop through all the expressions in the distinct list. If any of them
** are not simple column references, return early. Otherwise, test if the
** WHERE clause contains a "col=X" clause. If it does, the expression
** can be ignored. If it does not, and the column does not belong to the
** same table as index pIdx, return early. Finally, if there is no
** matching "col=X" expression and the column is on the same table as pIdx,
** set the corresponding bit in variable mask.
*/
for(i=0; i<pDistinct->nExpr; i++){
WhereTerm *pTerm;
Expr *p = pDistinct->a[i].pExpr;
if( p->op!=TK_COLUMN ) return 0;
pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0);
if( pTerm ){
Expr *pX = pTerm->pExpr;
CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
CollSeq *p2 = sqlite3ExprCollSeq(pParse, p);
if( p1==p2 ) continue;
}
if( p->iTable!=base ) return 0;
mask |= (((Bitmask)1) << i);
}
for(i=nEqCol; mask && i<pIdx->nColumn; i++){
int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i);
if( iExpr<0 ) break;
mask &= ~(((Bitmask)1) << iExpr);
}
return (mask==0);
}
/*
** Return true if the DISTINCT expression-list passed as the third argument
** is redundant. A DISTINCT list is redundant if the database contains a
** UNIQUE index that guarantees that the result of the query will be distinct
** anyway.
*/
static int isDistinctRedundant(
Parse *pParse,
SrcList *pTabList,
WhereClause *pWC,
ExprList *pDistinct
){
Table *pTab;
Index *pIdx;
int i;
int iBase;
/* If there is more than one table or sub-select in the FROM clause of
** this query, then it will not be possible to show that the DISTINCT
** clause is redundant. */
if( pTabList->nSrc!=1 ) return 0;
iBase = pTabList->a[0].iCursor;
pTab = pTabList->a[0].pTab;
/* If any of the expressions is an IPK column, then return true. */
for(i=0; i<pDistinct->nExpr; i++){
Expr *p = pDistinct->a[i].pExpr;
assert( p->op!=TK_COLUMN || p->iTable==iBase );
if( p->op==TK_COLUMN && p->iColumn<0 ) return 1;
}
/* Loop through all indices on the table, checking each to see if it makes
** the DISTINCT qualifier redundant. It does so if:
**
** 1. The index is itself UNIQUE, and
**
** 2. All of the columns in the index are either part of the pDistinct
** list, or else the WHERE clause contains a term of the form "col=X",
** where X is a constant value. The collation sequences of the
** comparison and select-list expressions must match those of the index.
*/
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
if( pIdx->onError==OE_None ) continue;
for(i=0; i<pIdx->nColumn; i++){
int iCol = pIdx->aiColumn[i];
if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx)
&& 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i)
){
break;
}
}
if( i==pIdx->nColumn ){
/* This index implies that the DISTINCT qualifier is redundant. */
return 1;
}
}
return 0;
}
/*
** This routine decides if pIdx can be used to satisfy the ORDER BY
@ -2910,9 +3060,7 @@ static void bestBtreeIndex(
/* If there is a DISTINCT qualifier and this index will scan rows in
** order of the DISTINCT expressions, clear bDist and set the appropriate
** flags in wsFlags. */
if( isSortingIndex(
pParse, pWC->pMaskSet, pProbe, iCur, pDistinct, nEq, wsFlags, 0)
){
if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){
bDist = 0;
wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT;
}
@ -4288,75 +4436,6 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){
}
}
/*
** Return true if the DISTINCT expression-list passed as the third argument
** is redundant. A DISTINCT list is redundant if the database contains a
** UNIQUE index that guarantees that the result of the query will be distinct
** anyway.
*/
static int whereDistinctRedundant(
Parse *pParse,
SrcList *pTabList,
WhereClause *pWC,
ExprList *pDistinct
){
Table *pTab;
Index *pIdx;
int i;
int iBase;
/* If there is more than one table or sub-select in the FROM clause of
** this query, then it will not be possible to show that the DISTINCT
** clause is redundant. */
if( pTabList->nSrc!=1 ) return 0;
iBase = pTabList->a[0].iCursor;
pTab = pTabList->a[0].pTab;
/* If any of the expressions is an IPK column, then return true. */
for(i=0; i<pDistinct->nExpr; i++){
Expr *p = pDistinct->a[i].pExpr;
assert( p->op!=TK_COLUMN || p->iTable==iBase );
if( p->op==TK_COLUMN && p->iColumn<0 ) return 1;
}
/* Loop through all indices on the table, checking each to see if it makes
** the DISTINCT qualifier redundant. It does so if:
**
** 1. The index is itself UNIQUE, and
**
** 2. All of the columns in the index are either part of the pDistinct
** list, or else the WHERE clause contains a term of the form "col=X",
** where X is a constant value. The collation sequences of the
** comparison and select-list expressions must match those of the index.
*/
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
if( pIdx->onError==OE_None ) continue;
for(i=0; i<pIdx->nColumn; i++){
int iCol = pIdx->aiColumn[i];
const char *zColl = pIdx->azColl[i];
if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
int j;
for(j=0; j<pDistinct->nExpr; j++){
Expr *p = pDistinct->a[j].pExpr;
assert( p->op!=TK_COLUMN || p->iTable==iBase );
if( p->op==TK_COLUMN && p->iColumn==iCol ){
CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
if( pColl && 0==sqlite3StrICmp(zColl, pColl->zName) ) break;
}
}
if( j==pDistinct->nExpr ) break;
}
}
if( i==pIdx->nColumn ){
/* This index implies that the DISTINCT qualifier is redundant. */
return 1;
}
}
return 0;
}
/*
** Generate the beginning of the loop used for WHERE clause processing.
@ -4583,7 +4662,7 @@ WhereInfo *sqlite3WhereBegin(
** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to
** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT.
*/
if( pDistinct && whereDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
pDistinct = 0;
pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
}

View File

@ -41,6 +41,23 @@ proc do_distinct_not_noop_test {tn sql} {
uplevel [list do_test $tn [list is_distinct_noop $sql] 0]
}
proc do_temptables_test {tn sql temptables} {
uplevel [list do_test $tn [subst -novar {
set ret ""
db eval "EXPLAIN [set sql]" {
if {$opcode == "OpenEphemeral"} {
if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" }
if {$p5 == "10"} {
lappend ret hash
} else {
lappend ret btree
}
}
}
set ret
}] $temptables]
}
#-------------------------------------------------------------------------
# The following tests - distinct-1.* - check that the planner correctly
@ -107,6 +124,37 @@ foreach {tn noop sql} {
}
}
#-------------------------------------------------------------------------
# The following tests - distinct-2.* - test cases where an index is
# used to deliver results in order of the DISTINCT expressions.
#
drop_all_tables
do_execsql_test 2.0 {
CREATE TABLE t1(a, b, c);
CREATE INDEX i1 ON t1(a, b);
CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase);
INSERT INTO t1 VALUES('a', 'b', 'c');
INSERT INTO t1 VALUES('A', 'B', 'C');
INSERT INTO t1 VALUES('a', 'b', 'c');
INSERT INTO t1 VALUES('A', 'B', 'C');
}
foreach {tn sql temptables res} {
1 "a, b FROM t1" {} {A B a b}
2 "b, a FROM t1" {} {B A b a}
3 "a, b, c FROM t1" {hash} {a b c A B C}
4 "a, b, c FROM t1 ORDER BY a, b, c" {btree} {A B C a b c}
5 "b FROM t1 WHERE a = 'a'" {} {b}
6 "b FROM t1" {hash} {b B}
7 "a FROM t1" {} {A a}
8 "b COLLATE nocase FROM t1" {} {b}
9 "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {} {B}
} {
do_execsql_test 2.$tn.1 "SELECT DISTINCT $sql" $res
do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables
}

View File

@ -20,7 +20,7 @@
# Commands to manipulate the db and the file-system at a high level:
#
# copy_file FROM TO
# drop_all_table ?DB?
# drop_all_tables ?DB?
# forcedelete FILENAME
#
# Test the capability of the SQLite version built into the interpreter to