Make use of histogram data to make better estimates for the number of rows

that will be returned from "x IN (v1,v2,v3,...)" constraints.

FossilOrigin-Name: fd3977a27ae68e694df12a4713e55515c1e87c5d
This commit is contained in:
drh 2011-01-21 16:27:18 +00:00
parent 9b3eb0adc1
commit 0c50fa0f61
3 changed files with 97 additions and 26 deletions

View File

@ -1,8 +1,8 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Add\sthe\sability\sto\suse\sindices\swhen\sa\srange\scontraint\sis\sbounded\son\nthe\slower\send\sby\sNULL.
D 2011-01-21T14:37:04.663
C Make\suse\sof\shistogram\sdata\sto\smake\sbetter\sestimates\sfor\sthe\snumber\sof\srows\nthat\swill\sbe\sreturned\sfrom\s"x\sIN\s(v1,v2,v3,...)"\sconstraints.
D 2011-01-21T16:27:18.621
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in de6498556d536ae60bb8bb10e8c1ba011448658c
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -243,7 +243,7 @@ F src/vtab.c b297e8fa656ab5e66244ab15680d68db0adbec30
F src/wal.c dbca424f71678f663a286ab2a98f947af1d412a7
F src/wal.h c1aac6593a0b02b15dc625987e619edeab39292e
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c cf219a4275cf430d0e7df9d2db04e9ba29702f8e
F src/where.c 7f2844afffd9e09373e874a74de81d3502b2a35c
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/alias.test 4529fbc152f190268a15f9384a5651bbbabc9d87
F test/all.test 51756962d522e474338e9b2ebb26e7364d4aa125
@ -900,14 +900,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P c7b59afaf0c0bf85dbaf0a122cc8d65fca93680f
R 07d2f363ee58370384294fd182ee30ff
P f73a167b434fadcbbd15e3891c4b7f4f87f6363c
R 46f7d508c889f9891a64638b5f1737ae
U drh
Z 32ed47abc5f3838cb9feba0ba8b99416
Z 14ba122ed035896c7c1b08aa324c4833
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFNOZoToxKgR168RlERAkh6AJ4kec2vRcpVEtGIXoGz4TjpsHbYigCeMtj1
RYqs8Oaohb8CL1KIU7eSI9Q=
=lXJE
iD8DBQFNObPpoxKgR168RlERAmgEAJ97hcV3wI5jmVOjUrAeDzSnM45gLACghPy2
7kt0j2FfeGbbS4tWO9hsJaU=
=BYPr
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
f73a167b434fadcbbd15e3891c4b7f4f87f6363c
fd3977a27ae68e694df12a4713e55515c1e87c5d

View File

@ -2475,18 +2475,19 @@ range_est_fallback:
** column of an index and sqlite_stat2 histogram data is available
** for that index.
**
** Write the estimated row count into *pnRow. If unable to make
** an estimate, leave *pnRow unchanged.
** Write the estimated row count into *pnRow and return SQLITE_OK.
** If unable to make an estimate, leave *pnRow unchanged and return
** non-zero.
**
** This routine can fail if it is unable to load a collating sequence
** required for string comparison, or if unable to allocate memory
** for a UTF conversion required for comparison. The error is stored
** in the pParse structure.
*/
void whereEqScanEst(
int whereEqualScanEst(
Parse *pParse, /* Parsing & code generating context */
Index *p, /* The index whose left-most column is pTerm */
WhereTerm *pTerm, /* The x=VALUE constraint */
Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */
double *pnRow /* Write the revised row estimate here */
){
sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */
@ -2496,14 +2497,14 @@ void whereEqScanEst(
double nRowEst; /* New estimate of the number of rows */
assert( p->aSample!=0 );
assert( pTerm->eOperator==WO_EQ );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs);
if( rc ) goto whereEqScanEst_cancel;
rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
if( rc ) goto whereEqualScanEst_cancel;
if( pRhs==0 ) return SQLITE_NOTFOUND;
rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
if( rc ) goto whereEqScanEst_cancel;
if( rc ) goto whereEqualScanEst_cancel;
rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
if( rc ) goto whereEqScanEst_cancel;
if( rc ) goto whereEqualScanEst_cancel;
WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
if( iLower>=iUpper ){
nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
@ -2513,8 +2514,76 @@ void whereEqScanEst(
*pnRow = nRowEst;
}
whereEqScanEst_cancel:
whereEqualScanEst_cancel:
sqlite3ValueFree(pRhs);
return rc;
}
#endif /* defined(SQLITE_ENABLE_STAT2) */
#ifdef SQLITE_ENABLE_STAT2
/*
** Estimate the number of rows that will be returned based on
** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side
** of the IN operator is a list of values.
**
** Write the estimated row count into *pnRow and return SQLITE_OK.
** If unable to make an estimate, leave *pnRow unchanged and return
** non-zero.
**
** This routine can fail if it is unable to load a collating sequence
** required for string comparison, or if unable to allocate memory
** for a UTF conversion required for comparison. The error is stored
** in the pParse structure.
*/
int whereInScanEst(
Parse *pParse, /* Parsing & code generating context */
Index *p, /* The index whose left-most column is pTerm */
ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
double *pnRow /* Write the revised row estimate here */
){
sqlite3_value *pVal = 0; /* One value from list */
int iLower, iUpper; /* Range of histogram regions containing pRhs */
u8 aff; /* Column affinity */
int rc; /* Subfunction return code */
double nRowEst; /* New estimate of the number of rows */
int nRegion = 0; /* Number of histogram regions spanned */
int nSingle = 0; /* Count of values contained within one region */
int nNotFound = 0; /* Count of values that are not constants */
int i; /* Loop counter */
u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */
assert( p->aSample!=0 );
aff = p->pTable->aCol[p->aiColumn[0]].affinity;
memset(aHit, 0, sizeof(aHit));
for(i=0; i<pList->nExpr; i++){
sqlite3ValueFree(pVal);
rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
if( rc ) break;
if( pVal==0 ){
nNotFound++;
continue;
}
rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
if( rc ) break;
rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
if( rc ) break;
if( iLower>=iUpper ){
nSingle++;
}
assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
while( iLower<=iUpper ) aHit[iLower++] = 1;
}
if( rc==SQLITE_OK ){
for(i=nRegion=0; i<ArraySize(aHit); i++) nRegion += aHit[i];
nRowEst = nRegion*p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES+1)
+ nNotFound*p->aiRowEst[1];
if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
*pnRow = nRowEst;
WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n",
nRegion, nSingle, nNotFound));
}
sqlite3ValueFree(pVal);
return rc;
}
#endif /* defined(SQLITE_ENABLE_STAT2) */
@ -2686,7 +2755,7 @@ static void bestBtreeIndex(
int bLookup = 0;
WhereTerm *pTerm; /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT2
WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */
WhereTerm *pFirstTerm = 0; /* First term matching the index */
#endif
/* Determine the values of nEq and nInMul */
@ -2710,9 +2779,7 @@ static void bestBtreeIndex(
wsFlags |= WHERE_COLUMN_NULL;
}
#ifdef SQLITE_ENABLE_STAT2
else if( nEq==0 && pProbe->aSample ){
pFirstEqTerm = pTerm;
}
if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
#endif
used |= pTerm->prereqRight;
}
@ -2796,8 +2863,12 @@ static void bestBtreeIndex(
** to get a better estimate on the number of rows based on
** VALUE and how common that value is according to the histogram.
*/
if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){
whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow);
if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
if( pFirstTerm->eOperator==WO_EQ ){
whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
}else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
}
}
#endif /* SQLITE_ENABLE_STAT2 */