When estimating the number of rows visited by a range scan for which the keys consist of more than one field, consider prefixes of stat4 samples as well as the full samples.
FossilOrigin-Name: e1caf93c9ad0ee15d42030af95619f212d3fcf9d
This commit is contained in:
parent
dfac7016a7
commit
a3d0c13654
17
manifest
17
manifest
@ -1,5 +1,5 @@
|
||||
C Add\stests\sto\sensure\s"PRAGMA\sincremental_vacuum"\sand\s"PRAGMA\sauto_vacuum\s=\sincremental"\shandle\scorrupt\sdatabases\scorrectly.
|
||||
D 2015-03-13T15:44:36.085
|
||||
C When\sestimating\sthe\snumber\sof\srows\svisited\sby\sa\srange\sscan\sfor\swhich\sthe\skeys\sconsist\sof\smore\sthan\sone\sfield,\sconsider\sprefixes\sof\sstat4\ssamples\sas\swell\sas\sthe\sfull\ssamples.
|
||||
D 2015-03-14T18:59:58.801
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@ -307,7 +307,7 @@ F src/vxworks.h c18586c8edc1bddbc15c004fa16aeb1e1342b4fb
|
||||
F src/wal.c 39303f2c9db02a4e422cd8eb2c8760420c6a51fe
|
||||
F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
|
||||
F src/walker.c c253b95b4ee44b21c406e2a1052636c31ea27804
|
||||
F src/where.c eb141b075776e9864d38f279333e2472a8653202
|
||||
F src/where.c 5a4e4ab378dbddeca59ad283c61aa67c6e56a913
|
||||
F src/whereInt.h cbe4aa57326998d89e7698ca65bb7c28541d483c
|
||||
F test/8_3_names.test ebbb5cd36741350040fd28b432ceadf495be25b2
|
||||
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
|
||||
@ -327,7 +327,7 @@ F test/analyze5.test 765c4e284aa69ca172772aa940946f55629bc8c4
|
||||
F test/analyze6.test f1c552ce39cca4ec922a7e4e0e5d0203d6b3281f
|
||||
F test/analyze7.test bb1409afc9e8629e414387ef048b8e0e3e0bdc4f
|
||||
F test/analyze8.test c05a461d0a6b05991106467d0c47480f2e709c82
|
||||
F test/analyze9.test 72795c8113604b5dcd47a1498a61d6d7fb5d041a
|
||||
F test/analyze9.test 2f6cfeae1fcc61cc531bd19f68e1e28fb6edafbf
|
||||
F test/analyzeA.test 3335697f6700c7052295cfd0067fc5b2aacddf9a
|
||||
F test/analyzeB.test 8bf35ee0a548aea831bf56762cb8e7fdb1db083d
|
||||
F test/analyzeC.test 555a6cc388b9818b6eda6df816f01ce0a75d3a93
|
||||
@ -1244,7 +1244,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 5aa522dcb9bfa18d49683f7cc889516984e2bcd2
|
||||
R a79445283a8ae7c7f85ae571d23bc239
|
||||
P 1c2166cb2a387a0856f41b399c3648bf8c5fce73
|
||||
R f0473fc546184f7826d3d60610130f49
|
||||
T *branch * stat4-change
|
||||
T *sym-stat4-change *
|
||||
T -sym-trunk *
|
||||
U dan
|
||||
Z d6c136b9ed1be16533c60706d0427e82
|
||||
Z e2a2579617ea31b6dadba1e9a71ba744
|
||||
|
@ -1 +1 @@
|
||||
1c2166cb2a387a0856f41b399c3648bf8c5fce73
|
||||
e1caf93c9ad0ee15d42030af95619f212d3fcf9d
|
174
src/where.c
174
src/where.c
@ -1931,11 +1931,14 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){
|
||||
** Estimate the location of a particular key among all keys in an
|
||||
** index. Store the results in aStat as follows:
|
||||
**
|
||||
** aStat[0] Est. number of rows less than pVal
|
||||
** aStat[1] Est. number of rows equal to pVal
|
||||
** aStat[0] Est. number of rows less than pRec
|
||||
** aStat[1] Est. number of rows equal to pRec
|
||||
**
|
||||
** Return the index of the sample that is the smallest sample that
|
||||
** is greater than or equal to pRec.
|
||||
** is greater than or equal to pRec. Note that this index is not an index
|
||||
** into the aSample[] array - it is an index into a virtual set of samples
|
||||
** based on the contents of aSample[] and the number of fields in record
|
||||
** pRec.
|
||||
*/
|
||||
static int whereKeyStats(
|
||||
Parse *pParse, /* Database connection */
|
||||
@ -1946,67 +1949,158 @@ static int whereKeyStats(
|
||||
){
|
||||
IndexSample *aSample = pIdx->aSample;
|
||||
int iCol; /* Index of required stats in anEq[] etc. */
|
||||
int i; /* Index of first sample >= pRec */
|
||||
int iSample; /* Smallest sample larger than or equal to pRec */
|
||||
int iMin = 0; /* Smallest sample not yet tested */
|
||||
int i = pIdx->nSample; /* Smallest sample larger than or equal to pRec */
|
||||
int iTest; /* Next sample to test */
|
||||
int res; /* Result of comparison operation */
|
||||
int nField; /* Number of fields in pRec */
|
||||
tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */
|
||||
|
||||
#ifndef SQLITE_DEBUG
|
||||
UNUSED_PARAMETER( pParse );
|
||||
#endif
|
||||
assert( pRec!=0 );
|
||||
iCol = pRec->nField - 1;
|
||||
assert( pIdx->nSample>0 );
|
||||
assert( pRec->nField>0 && iCol<pIdx->nSampleCol );
|
||||
assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol );
|
||||
|
||||
/* Do a binary search to find the first sample greater than or equal
|
||||
** to pRec. If pRec contains a single field, the set of samples to search
|
||||
** is simply the aSample[] array. If the samples in aSample[] contain more
|
||||
** than one fields, all fields following the first are ignored.
|
||||
**
|
||||
** If pRec contains N fields, where N is more than one, then as well as the
|
||||
** samples in aSample[] (truncated to N fields), the search also has to
|
||||
** consider prefixes of those samples. For example, if the set of samples
|
||||
** in aSample is:
|
||||
**
|
||||
** aSample[0] = (a, 5)
|
||||
** aSample[1] = (a, 10)
|
||||
** aSample[2] = (b, 5)
|
||||
** aSample[3] = (c, 100)
|
||||
** aSample[4] = (c, 105)
|
||||
**
|
||||
** Then the search space should ideally be the samples above and the
|
||||
** unique prefixes [a], [b] and [c]. But since that is hard to organize,
|
||||
** the code actually searches this set:
|
||||
**
|
||||
** 0: (a)
|
||||
** 1: (a, 5)
|
||||
** 2: (a, 10)
|
||||
** 3: (a, 10)
|
||||
** 4: (b)
|
||||
** 5: (b, 5)
|
||||
** 6: (c)
|
||||
** 7: (c, 100)
|
||||
** 8: (c, 105)
|
||||
** 9: (c, 105)
|
||||
**
|
||||
** For each sample in the aSample[] array, N samples are present in the
|
||||
** effective sample array. In the above, samples 0 and 1 are based on
|
||||
** sample aSample[0]. Samples 2 and 3 on aSample[1] etc.
|
||||
**
|
||||
** Often, sample i of each block of N effective samples has (i+1) fields.
|
||||
** Except, each sample may be extended to ensure that it is greater than or
|
||||
** equal to the previous sample in the array. For example, in the above,
|
||||
** sample 2 is the first sample of a block of N samples, so at first it
|
||||
** appears that it should be 1 field in size. However, that would make it
|
||||
** smaller than sample 1, so the binary search would not work. As a result,
|
||||
** it is extended to two fields. The duplicates that this creates do not
|
||||
** cause any problems.
|
||||
*/
|
||||
nField = pRec->nField;
|
||||
iCol = 0;
|
||||
iSample = pIdx->nSample * nField;
|
||||
do{
|
||||
iTest = (iMin+i)/2;
|
||||
res = sqlite3VdbeRecordCompare(aSample[iTest].n, aSample[iTest].p, pRec);
|
||||
if( res<0 ){
|
||||
iMin = iTest+1;
|
||||
int iSamp; /* Index in aSample[] of test sample */
|
||||
int n; /* Number of fields in test sample */
|
||||
|
||||
iTest = (iMin+iSample)/2;
|
||||
iSamp = iTest / nField;
|
||||
if( iSamp>0 ){
|
||||
/* The proposed effective sample is a prefix of sample aSample[iSamp].
|
||||
** Specifically, the shortest prefix of at least (1 + iTest%nField)
|
||||
** fields that is greater than the previous effective sample. */
|
||||
for(n=(iTest % nField) + 1; n<nField; n++){
|
||||
if( aSample[iSamp-1].anLt[n-1]!=aSample[iSamp].anLt[n-1] ) break;
|
||||
}
|
||||
}else{
|
||||
i = iTest;
|
||||
n = iTest + 1;
|
||||
}
|
||||
}while( res && iMin<i );
|
||||
|
||||
pRec->nField = n;
|
||||
res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec);
|
||||
if( res<0 ){
|
||||
iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1];
|
||||
iMin = iTest+1;
|
||||
}else if( res==0 && n<nField ){
|
||||
iLower = aSample[iSamp].anLt[n-1];
|
||||
iMin = iTest+1;
|
||||
res = -1;
|
||||
}else{
|
||||
iSample = iTest;
|
||||
iCol = n-1;
|
||||
}
|
||||
}while( res && iMin<iSample );
|
||||
i = iSample / nField;
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
/* The following assert statements check that the binary search code
|
||||
** above found the right answer. This block serves no purpose other
|
||||
** than to invoke the asserts. */
|
||||
if( res==0 ){
|
||||
/* If (res==0) is true, then sample $i must be equal to pRec */
|
||||
assert( i<pIdx->nSample );
|
||||
assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)
|
||||
|| pParse->db->mallocFailed );
|
||||
}else{
|
||||
/* Otherwise, pRec must be smaller than sample $i and larger than
|
||||
** sample ($i-1). */
|
||||
assert( i==pIdx->nSample
|
||||
|| sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
|
||||
|| pParse->db->mallocFailed );
|
||||
assert( i==0
|
||||
|| sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
|
||||
|| pParse->db->mallocFailed );
|
||||
if( pParse->db->mallocFailed==0 ){
|
||||
if( res==0 ){
|
||||
/* If (res==0) is true, then pRec must be equal to sample i. */
|
||||
assert( i<pIdx->nSample );
|
||||
assert( iCol==nField-1 );
|
||||
pRec->nField = nField;
|
||||
assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)
|
||||
|| pParse->db->mallocFailed
|
||||
);
|
||||
}else{
|
||||
/* Unless i==pIdx->nSample, indicating that pRec is larger than
|
||||
** all samples in the aSample[] array, pRec must be smaller than the
|
||||
** (iCol+1) field prefix of sample i. */
|
||||
assert( i<=pIdx->nSample && i>=0 );
|
||||
pRec->nField = iCol+1;
|
||||
assert( i==pIdx->nSample
|
||||
|| sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
|
||||
|| pParse->db->mallocFailed );
|
||||
|
||||
/* if i==0 and iCol==0, then record pRec is smaller than all samples
|
||||
** in the aSample[] array. Otherwise, if (iCol>0) then pRec must
|
||||
** be greater than or equal to the (iCol) field prefix of sample i.
|
||||
** If (i>0), then pRec must also be greater than sample (i-1). */
|
||||
if( iCol>0 ){
|
||||
pRec->nField = iCol;
|
||||
assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0
|
||||
|| pParse->db->mallocFailed );
|
||||
}
|
||||
if( i>0 ){
|
||||
pRec->nField = nField;
|
||||
assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
|
||||
|| pParse->db->mallocFailed );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* ifdef SQLITE_DEBUG */
|
||||
|
||||
/* At this point, aSample[i] is the first sample that is greater than
|
||||
** or equal to pVal. Or if i==pIdx->nSample, then all samples are less
|
||||
** than pVal. If aSample[i]==pVal, then res==0.
|
||||
*/
|
||||
if( res==0 ){
|
||||
/* Record pRec is equal to sample i */
|
||||
assert( iCol==nField-1 );
|
||||
aStat[0] = aSample[i].anLt[iCol];
|
||||
aStat[1] = aSample[i].anEq[iCol];
|
||||
}else{
|
||||
tRowcnt iLower, iUpper, iGap;
|
||||
if( i==0 ){
|
||||
iLower = 0;
|
||||
iUpper = aSample[0].anLt[iCol];
|
||||
/* At this point, the (iCol+1) field prefix of aSample[i] is the first
|
||||
** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec
|
||||
** is larger than all samples in the array. */
|
||||
tRowcnt iUpper, iGap;
|
||||
if( i>=pIdx->nSample ){
|
||||
iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
|
||||
}else{
|
||||
i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
|
||||
iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol];
|
||||
iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol];
|
||||
iUpper = aSample[i].anLt[iCol];
|
||||
}
|
||||
aStat[1] = pIdx->aAvgEq[iCol];
|
||||
|
||||
if( iLower>=iUpper ){
|
||||
iGap = 0;
|
||||
}else{
|
||||
@ -2018,7 +2112,11 @@ static int whereKeyStats(
|
||||
iGap = iGap/3;
|
||||
}
|
||||
aStat[0] = iLower + iGap;
|
||||
aStat[1] = pIdx->aAvgEq[iCol];
|
||||
}
|
||||
|
||||
/* Restore the pRec->nField value before returning. */
|
||||
pRec->nField = nField;
|
||||
return i;
|
||||
}
|
||||
#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
|
||||
|
@ -1134,4 +1134,67 @@ ifcapable stat4&&cte {
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Check that a problem in they way stat4 data is used has been
|
||||
# resolved (see below).
|
||||
#
|
||||
reset_db
|
||||
do_test 26.1 {
|
||||
db transaction {
|
||||
execsql {
|
||||
CREATE TABLE t1(x, y, z);
|
||||
CREATE INDEX t1xy ON t1(x, y);
|
||||
CREATE INDEX t1z ON t1(z);
|
||||
}
|
||||
for {set i 0} {$i < 10000} {incr i} {
|
||||
execsql { INSERT INTO t1(x, y) VALUES($i, $i) }
|
||||
}
|
||||
for {set i 0} {$i < 10} {incr i} {
|
||||
execsql {
|
||||
WITH cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt WHERE x<100)
|
||||
INSERT INTO t1(x, y) SELECT 10000+$i, x FROM cnt;
|
||||
INSERT INTO t1(x, y) SELECT 10000+$i, 100;
|
||||
}
|
||||
}
|
||||
execsql {
|
||||
UPDATE t1 SET z = rowid / 20;
|
||||
ANALYZE;
|
||||
}
|
||||
}
|
||||
} {}
|
||||
|
||||
do_execsql_test 26.2 {
|
||||
SELECT count(*) FROM t1 WHERE x = 10000 AND y < 50;
|
||||
} {49}
|
||||
do_execsql_test 26.3 {
|
||||
SELECT count(*) FROM t1 WHERE z = 444;
|
||||
} {20}
|
||||
|
||||
# The analyzer knows that any (z=?) expression matches 20 rows. So it
|
||||
# will use index "t1z" if the estimate of hits for (x=10000 AND y<50)
|
||||
# is greater than 20 rows.
|
||||
#
|
||||
# And it should be. The analyzer has a stat4 sample as follows:
|
||||
#
|
||||
# sample=(x=10000, y=100) nLt=(10000 10099)
|
||||
#
|
||||
# There should be no other samples that start with (x=10000). So it knows
|
||||
# that (x=10000 AND y<50) must match somewhere between 0 and 99 rows, but
|
||||
# know more than that. Guessing less than 20 is therefore unreasonable.
|
||||
#
|
||||
# At one point though, due to a problem in whereKeyStats(), the planner was
|
||||
# estimating that (x=10000 AND y<50) would match only 2 rows.
|
||||
#
|
||||
do_eqp_test 26.4 {
|
||||
SELECT * FROM t1 WHERE x = 10000 AND y < 50 AND z = 444;
|
||||
} {
|
||||
0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?)}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
finish_test
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user