mirror of https://github.com/sqlite/sqlite
Improve use of indexes to optimize DISTINCT queries.
FossilOrigin-Name: 6c202ea0247ff509f696eee3839975a88ed26cf2
This commit is contained in:
parent
50118cdfdb
commit
6f3439697e
16
manifest
16
manifest
|
@ -1,5 +1,5 @@
|
|||
C Improvements\sand\stests\sfor\sdetection\sof\sredundant\sDISTINCT\squalifiers.
|
||||
D 2011-07-01T14:21:38.743
|
||||
C Improve\suse\sof\sindexes\sto\soptimize\sDISTINCT\squeries.
|
||||
D 2011-07-01T18:26:40.769
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
|
@ -250,7 +250,7 @@ F src/vtab.c 901791a47318c0562cd0c676a2c6ff1bc530e582
|
|||
F src/wal.c 0c70ad7b1cac6005fa5e2cbefd23ee05e391c290
|
||||
F src/wal.h 66b40bd91bc29a5be1c88ddd1f5ade8f3f48728a
|
||||
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
|
||||
F src/where.c 207cf2c12b391cfdfae89ca3c8afe2adbf46f712
|
||||
F src/where.c 83cf8e8637826a311e598980bf5c920069847a77
|
||||
F test/8_3_names.test b93687beebd17f6ebf812405a6833bae5d1f4199
|
||||
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
|
||||
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
|
||||
|
@ -368,7 +368,7 @@ F test/descidx1.test b1353c1a15cfbee97b13a1dcedaf0fe78163ba6a
|
|||
F test/descidx2.test 9f1a0c83fd57f8667c82310ca21b30a350888b5d
|
||||
F test/descidx3.test fe720e8b37d59f4cef808b0bf4e1b391c2e56b6f
|
||||
F test/diskfull.test 0cede7ef9d8f415d9d3944005c76be7589bb5ebb
|
||||
F test/distinct.test 17985484790bbb6d098af682f808f11940ca6375
|
||||
F test/distinct.test dbedaddb15c7a96729d39e5c3d7c2993ac905a1d
|
||||
F test/distinctagg.test 1a6ef9c87a58669438fc771450d7a72577417376
|
||||
F test/e_createtable.test 4771686a586b6ae414f927c389b2c101cc05c028
|
||||
F test/e_delete.test e2ae0d3fce5efd70fef99025e932afffc5616fab
|
||||
|
@ -696,7 +696,7 @@ F test/tclsqlite.test 8c154101e704170c2be10f137a5499ac2c6da8d3
|
|||
F test/tempdb.test 19d0f66e2e3eeffd68661a11c83ba5e6ace9128c
|
||||
F test/temptable.test 1a21a597055dcf6002b6f1ee867632dccd6e925c
|
||||
F test/temptrigger.test b0273db072ce5f37cf19140ceb1f0d524bbe9f05
|
||||
F test/tester.tcl 7d3d17914ca5397a82f801db733b81d4a82f50c3
|
||||
F test/tester.tcl aad52c7644513e5650f6613d371b0af3193c3885
|
||||
F test/thread001.test a3e6a7254d1cb057836cb3145b60c10bf5b7e60f
|
||||
F test/thread002.test afd20095e6e845b405df4f2c920cb93301ca69db
|
||||
F test/thread003.test b824d4f52b870ae39fc5bae4d8070eca73085dca
|
||||
|
@ -950,7 +950,7 @@ F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00
|
|||
F tool/tostr.awk 11760e1b94a5d3dcd42378f3cc18544c06cfa576
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
F tool/warnings.sh 2ebae31e1eb352696f3c2f7706a34c084b28c262
|
||||
P f7ba0219ef2f235543c258be736955d91ca5ecce
|
||||
R 778c4d57ca4312ecfa96b1ac67177d07
|
||||
P 7337293c87fb563604dd6ad284f2d1e30c938b4c
|
||||
R 0a65cc1234bf3efb15e20cfbd6948f45
|
||||
U dan
|
||||
Z 94d823140247a8802d583b432d46d049
|
||||
Z be26c18c91323ba4302e319914f9eafb
|
||||
|
|
|
@ -1 +1 @@
|
|||
7337293c87fb563604dd6ad284f2d1e30c938b4c
|
||||
6c202ea0247ff509f696eee3839975a88ed26cf2
|
225
src/where.c
225
src/where.c
|
@ -1398,6 +1398,156 @@ static int referencesOtherTables(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function searches the expression list passed as the second argument
|
||||
** for an expression of type TK_COLUMN that refers to the same column and
|
||||
** uses the same collation sequence as the iCol'th column of index pIdx.
|
||||
** Argument iBase is the cursor number used for the table that pIdx refers
|
||||
** to.
|
||||
**
|
||||
** If such an expression is found, its index in pList->a[] is returned. If
|
||||
** no expression is found, -1 is returned.
|
||||
*/
|
||||
static int findIndexCol(
|
||||
Parse *pParse, /* Parse context */
|
||||
ExprList *pList, /* Expression list to search */
|
||||
int iBase, /* Cursor for table associated with pIdx */
|
||||
Index *pIdx, /* Index to match column of */
|
||||
int iCol /* Column of index to match */
|
||||
){
|
||||
int i;
|
||||
const char *zColl = pIdx->azColl[iCol];
|
||||
|
||||
for(i=0; i<pList->nExpr; i++){
|
||||
Expr *p = pList->a[i].pExpr;
|
||||
if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){
|
||||
CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
|
||||
if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This routine determines if pIdx can be used to assist in processing a
|
||||
** DISTINCT qualifier. In other words, it tests whether or not using this
|
||||
** index for the outer loop guarantees that rows with equal values for
|
||||
** all expressions in the pDistinct list are delivered grouped together.
|
||||
**
|
||||
** For example, the query
|
||||
**
|
||||
** SELECT DISTINCT a, b, c FROM tbl WHERE a = ?
|
||||
**
|
||||
** can benefit from any index on columns "b" and "c".
|
||||
*/
|
||||
static int isDistinctIndex(
|
||||
Parse *pParse, /* Parsing context */
|
||||
WhereClause *pWC, /* The WHERE clause */
|
||||
Index *pIdx, /* The index being considered */
|
||||
int base, /* Cursor number for the table pIdx is on */
|
||||
ExprList *pDistinct, /* The DISTINCT expressions */
|
||||
int nEqCol /* Number of index columns with == */
|
||||
){
|
||||
Bitmask mask = 0; /* Mask of unaccounted for pDistinct exprs */
|
||||
int i; /* Iterator variable */
|
||||
|
||||
if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0;
|
||||
|
||||
/* Loop through all the expressions in the distinct list. If any of them
|
||||
** are not simple column references, return early. Otherwise, test if the
|
||||
** WHERE clause contains a "col=X" clause. If it does, the expression
|
||||
** can be ignored. If it does not, and the column does not belong to the
|
||||
** same table as index pIdx, return early. Finally, if there is no
|
||||
** matching "col=X" expression and the column is on the same table as pIdx,
|
||||
** set the corresponding bit in variable mask.
|
||||
*/
|
||||
for(i=0; i<pDistinct->nExpr; i++){
|
||||
WhereTerm *pTerm;
|
||||
Expr *p = pDistinct->a[i].pExpr;
|
||||
if( p->op!=TK_COLUMN ) return 0;
|
||||
pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0);
|
||||
if( pTerm ){
|
||||
Expr *pX = pTerm->pExpr;
|
||||
CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
|
||||
CollSeq *p2 = sqlite3ExprCollSeq(pParse, p);
|
||||
if( p1==p2 ) continue;
|
||||
}
|
||||
if( p->iTable!=base ) return 0;
|
||||
mask |= (((Bitmask)1) << i);
|
||||
}
|
||||
|
||||
for(i=nEqCol; mask && i<pIdx->nColumn; i++){
|
||||
int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i);
|
||||
if( iExpr<0 ) break;
|
||||
mask &= ~(((Bitmask)1) << iExpr);
|
||||
}
|
||||
|
||||
return (mask==0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Return true if the DISTINCT expression-list passed as the third argument
|
||||
** is redundant. A DISTINCT list is redundant if the database contains a
|
||||
** UNIQUE index that guarantees that the result of the query will be distinct
|
||||
** anyway.
|
||||
*/
|
||||
static int isDistinctRedundant(
|
||||
Parse *pParse,
|
||||
SrcList *pTabList,
|
||||
WhereClause *pWC,
|
||||
ExprList *pDistinct
|
||||
){
|
||||
Table *pTab;
|
||||
Index *pIdx;
|
||||
int i;
|
||||
int iBase;
|
||||
|
||||
/* If there is more than one table or sub-select in the FROM clause of
|
||||
** this query, then it will not be possible to show that the DISTINCT
|
||||
** clause is redundant. */
|
||||
if( pTabList->nSrc!=1 ) return 0;
|
||||
iBase = pTabList->a[0].iCursor;
|
||||
pTab = pTabList->a[0].pTab;
|
||||
|
||||
/* If any of the expressions is an IPK column, then return true. */
|
||||
for(i=0; i<pDistinct->nExpr; i++){
|
||||
Expr *p = pDistinct->a[i].pExpr;
|
||||
assert( p->op!=TK_COLUMN || p->iTable==iBase );
|
||||
if( p->op==TK_COLUMN && p->iColumn<0 ) return 1;
|
||||
}
|
||||
|
||||
/* Loop through all indices on the table, checking each to see if it makes
|
||||
** the DISTINCT qualifier redundant. It does so if:
|
||||
**
|
||||
** 1. The index is itself UNIQUE, and
|
||||
**
|
||||
** 2. All of the columns in the index are either part of the pDistinct
|
||||
** list, or else the WHERE clause contains a term of the form "col=X",
|
||||
** where X is a constant value. The collation sequences of the
|
||||
** comparison and select-list expressions must match those of the index.
|
||||
*/
|
||||
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
|
||||
if( pIdx->onError==OE_None ) continue;
|
||||
for(i=0; i<pIdx->nColumn; i++){
|
||||
int iCol = pIdx->aiColumn[i];
|
||||
if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx)
|
||||
&& 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i)
|
||||
){
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( i==pIdx->nColumn ){
|
||||
/* This index implies that the DISTINCT qualifier is redundant. */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** This routine decides if pIdx can be used to satisfy the ORDER BY
|
||||
|
@ -2910,9 +3060,7 @@ static void bestBtreeIndex(
|
|||
/* If there is a DISTINCT qualifier and this index will scan rows in
|
||||
** order of the DISTINCT expressions, clear bDist and set the appropriate
|
||||
** flags in wsFlags. */
|
||||
if( isSortingIndex(
|
||||
pParse, pWC->pMaskSet, pProbe, iCur, pDistinct, nEq, wsFlags, 0)
|
||||
){
|
||||
if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){
|
||||
bDist = 0;
|
||||
wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT;
|
||||
}
|
||||
|
@ -4288,75 +4436,6 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return true if the DISTINCT expression-list passed as the third argument
|
||||
** is redundant. A DISTINCT list is redundant if the database contains a
|
||||
** UNIQUE index that guarantees that the result of the query will be distinct
|
||||
** anyway.
|
||||
*/
|
||||
static int whereDistinctRedundant(
|
||||
Parse *pParse,
|
||||
SrcList *pTabList,
|
||||
WhereClause *pWC,
|
||||
ExprList *pDistinct
|
||||
){
|
||||
Table *pTab;
|
||||
Index *pIdx;
|
||||
int i;
|
||||
int iBase;
|
||||
|
||||
/* If there is more than one table or sub-select in the FROM clause of
|
||||
** this query, then it will not be possible to show that the DISTINCT
|
||||
** clause is redundant. */
|
||||
if( pTabList->nSrc!=1 ) return 0;
|
||||
iBase = pTabList->a[0].iCursor;
|
||||
pTab = pTabList->a[0].pTab;
|
||||
|
||||
/* If any of the expressions is an IPK column, then return true. */
|
||||
for(i=0; i<pDistinct->nExpr; i++){
|
||||
Expr *p = pDistinct->a[i].pExpr;
|
||||
assert( p->op!=TK_COLUMN || p->iTable==iBase );
|
||||
if( p->op==TK_COLUMN && p->iColumn<0 ) return 1;
|
||||
}
|
||||
|
||||
/* Loop through all indices on the table, checking each to see if it makes
|
||||
** the DISTINCT qualifier redundant. It does so if:
|
||||
**
|
||||
** 1. The index is itself UNIQUE, and
|
||||
**
|
||||
** 2. All of the columns in the index are either part of the pDistinct
|
||||
** list, or else the WHERE clause contains a term of the form "col=X",
|
||||
** where X is a constant value. The collation sequences of the
|
||||
** comparison and select-list expressions must match those of the index.
|
||||
*/
|
||||
for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
|
||||
if( pIdx->onError==OE_None ) continue;
|
||||
for(i=0; i<pIdx->nColumn; i++){
|
||||
int iCol = pIdx->aiColumn[i];
|
||||
const char *zColl = pIdx->azColl[i];
|
||||
|
||||
if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
|
||||
int j;
|
||||
for(j=0; j<pDistinct->nExpr; j++){
|
||||
Expr *p = pDistinct->a[j].pExpr;
|
||||
assert( p->op!=TK_COLUMN || p->iTable==iBase );
|
||||
if( p->op==TK_COLUMN && p->iColumn==iCol ){
|
||||
CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
|
||||
if( pColl && 0==sqlite3StrICmp(zColl, pColl->zName) ) break;
|
||||
}
|
||||
}
|
||||
if( j==pDistinct->nExpr ) break;
|
||||
}
|
||||
}
|
||||
if( i==pIdx->nColumn ){
|
||||
/* This index implies that the DISTINCT qualifier is redundant. */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Generate the beginning of the loop used for WHERE clause processing.
|
||||
|
@ -4583,7 +4662,7 @@ WhereInfo *sqlite3WhereBegin(
|
|||
** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to
|
||||
** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT.
|
||||
*/
|
||||
if( pDistinct && whereDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
|
||||
if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
|
||||
pDistinct = 0;
|
||||
pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,23 @@ proc do_distinct_not_noop_test {tn sql} {
|
|||
uplevel [list do_test $tn [list is_distinct_noop $sql] 0]
|
||||
}
|
||||
|
||||
proc do_temptables_test {tn sql temptables} {
|
||||
uplevel [list do_test $tn [subst -novar {
|
||||
set ret ""
|
||||
db eval "EXPLAIN [set sql]" {
|
||||
if {$opcode == "OpenEphemeral"} {
|
||||
if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" }
|
||||
if {$p5 == "10"} {
|
||||
lappend ret hash
|
||||
} else {
|
||||
lappend ret btree
|
||||
}
|
||||
}
|
||||
}
|
||||
set ret
|
||||
}] $temptables]
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# The following tests - distinct-1.* - check that the planner correctly
|
||||
|
@ -107,6 +124,37 @@ foreach {tn noop sql} {
|
|||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# The following tests - distinct-2.* - test cases where an index is
|
||||
# used to deliver results in order of the DISTINCT expressions.
|
||||
#
|
||||
drop_all_tables
|
||||
do_execsql_test 2.0 {
|
||||
CREATE TABLE t1(a, b, c);
|
||||
|
||||
CREATE INDEX i1 ON t1(a, b);
|
||||
CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase);
|
||||
|
||||
INSERT INTO t1 VALUES('a', 'b', 'c');
|
||||
INSERT INTO t1 VALUES('A', 'B', 'C');
|
||||
INSERT INTO t1 VALUES('a', 'b', 'c');
|
||||
INSERT INTO t1 VALUES('A', 'B', 'C');
|
||||
}
|
||||
|
||||
foreach {tn sql temptables res} {
|
||||
1 "a, b FROM t1" {} {A B a b}
|
||||
2 "b, a FROM t1" {} {B A b a}
|
||||
3 "a, b, c FROM t1" {hash} {a b c A B C}
|
||||
4 "a, b, c FROM t1 ORDER BY a, b, c" {btree} {A B C a b c}
|
||||
5 "b FROM t1 WHERE a = 'a'" {} {b}
|
||||
6 "b FROM t1" {hash} {b B}
|
||||
7 "a FROM t1" {} {A a}
|
||||
8 "b COLLATE nocase FROM t1" {} {b}
|
||||
9 "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {} {B}
|
||||
} {
|
||||
do_execsql_test 2.$tn.1 "SELECT DISTINCT $sql" $res
|
||||
do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
# Commands to manipulate the db and the file-system at a high level:
|
||||
#
|
||||
# copy_file FROM TO
|
||||
# drop_all_table ?DB?
|
||||
# drop_all_tables ?DB?
|
||||
# forcedelete FILENAME
|
||||
#
|
||||
# Test the capability of the SQLite version built into the interpreter to
|
||||
|
|
Loading…
Reference in New Issue