If STAT4 determines that a WHERE clause term that is not used by an index

has very high probability of being true, then do not use that term to reduce
the estimated output row count.

FossilOrigin-Name: 40739c793b0e98a3bae296d3a1f74944edcdd4cc33c26b417fde4eaf6f14d062
This commit is contained in:
drh 2020-02-24 17:05:09 +00:00
commit f575c1cf06
5 changed files with 183 additions and 27 deletions

View File

@ -1,5 +1,5 @@
C Fix\sa\sproblem\swith\sALTER\sTABLE\sfor\sviews\sthat\shave\sa\snested\sFROM\sclause.\nTicket\s[f50af3e8a565776b].
D 2020-02-23T17:34:45.938
C If\sSTAT4\sdetermines\sthat\sa\sWHERE\sclause\sterm\sthat\sis\snot\sused\sby\san\sindex\nhas\svery\shigh\sprobability\sof\sbeing\strue,\sthen\sdo\snot\suse\sthat\sterm\sto\sreduce\nthe\sestimated\soutput\srow\scount.
D 2020-02-24T17:05:09.915
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -617,8 +617,8 @@ F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c 697424314e40d99f93f548c7bfa526c10e87f4bdf64d5a76a96b999dd7133ebc
F src/wal.h 606292549f5a7be50b6227bd685fa76e3a4affad71bb8ac5ce4cb5c79f6a176a
F src/walker.c a137468bf36c92e64d2275caa80c83902e3a0fc59273591b96c6416d3253d05d
F src/where.c cbad14f1d8e11b9f052e937274315c7c17266a89eda408c86084ee894debb7d5
F src/whereInt.h 9157228db086f436a574589f8cc5749bd971e94017c552305ad9ec472ed2e098
F src/where.c 3b8c9bd013eb0736e16f60bdc109e83337ef99513a3aff5f16ddac036e6c277e
F src/whereInt.h 6b874aa15f94e43a2cec1080be64d955b04deeafeac90ffb5d6975c0d511be3c
F src/wherecode.c f5df56e395ade2240cabb2d39500c681bd29f8cc0636c3301c4996ad160df94d
F src/whereexpr.c 264d58971eaf8256eb5b0917bcd7fc7a1f1109fdda183a8382308a1b18a2dce7
F src/window.c f8ba2ee12a19b51d3ba42c16277c74185ee9215306bc0d5a03974ade8b5bc98f
@ -655,6 +655,7 @@ F test/analyzeC.test 489fe2ea3be3f17548e8dd895f1b41c9669b52de1b0861f5bffe6eec46e
F test/analyzeD.test e50cd0b3e6063216cc0c88a1776e8645dc0bd65a6bb275769cbee33b7fd8d90c
F test/analyzeE.test 8684e8ac5722fb97c251887ad97e5d496a98af1d
F test/analyzeF.test 9e1a0537949eb5483642b1140a5c39e5b4025939024b935398471fa552f4dabb
F test/analyzeG.test a48c0f324dd14de9a40d52abe5ca2637f682b9a791d2523dd619f6efa14e345b
F test/analyzer1.test 459fa02c445ddbf0101a3bad47b34290a35f2e49
F test/async.test 1d0e056ba1bb9729283a0f22718d3a25e82c277b
F test/async2.test c0a9bd20816d7d6a2ceca7b8c03d3d69c28ffb8b
@ -1858,7 +1859,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P f02030b3403d67734bba471a91ad5bfdb03ddf6fdc3ef14808a04495e43b0470
R 27f30806e168cf65f551539c0795adeb
P c431b3fd8fd0f6a6974bba3e9366b0430ec003d570e7ce70ceefbcff5fe4b6fa 4558163b6a525990f0f1b6629dbb76daf49bcaf1ddbaf0c50fe05ce9ee480ff8
R 2f59a601b560c09f0f628a3935395fd4
T +closed 4558163b6a525990f0f1b6629dbb76daf49bcaf1ddbaf0c50fe05ce9ee480ff8
U drh
Z b1d396f928d83f91cde5e969c202f590
Z 88585aca481f88c3f165f1a6e9b07ffb

View File

@ -1 +1 @@
c431b3fd8fd0f6a6974bba3e9366b0430ec003d570e7ce70ceefbcff5fe4b6fa
40739c793b0e98a3bae296d3a1f74944edcdd4cc33c26b417fde4eaf6f14d062

View File

@ -2298,7 +2298,9 @@ static void whereLoopOutputAdjust(
/* In the absence of explicit truth probabilities, use heuristics to
** guess a reasonable truth probability. */
pLoop->nOut--;
if( pTerm->eOperator&(WO_EQ|WO_IS) ){
if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0
&& (pTerm->wtFlags & TERM_HIGHTRUTH)==0 /* tag-20200224-1 */
){
Expr *pRight = pTerm->pExpr->pRight;
int k = 0;
testcase( pTerm->pExpr->op==TK_IS );
@ -2307,7 +2309,10 @@ static void whereLoopOutputAdjust(
}else{
k = 20;
}
if( iReduce<k ) iReduce = k;
if( iReduce<k ){
pTerm->wtFlags |= TERM_HEURTRUTH;
iReduce = k;
}
}
}
}
@ -2489,9 +2494,9 @@ static int whereLoopAddBtreeIndex(
}
if( IsUniqueIndex(pProbe) && saved_nEq==pProbe->nKeyCol-1 ){
pBuilder->bldFlags |= SQLITE_BLDF_UNIQUE;
pBuilder->bldFlags1 |= SQLITE_BLDF1_UNIQUE;
}else{
pBuilder->bldFlags |= SQLITE_BLDF_INDEXED;
pBuilder->bldFlags1 |= SQLITE_BLDF1_INDEXED;
}
pNew->wsFlags = saved_wsFlags;
pNew->u.btree.nEq = saved_nEq;
@ -2656,6 +2661,27 @@ static int whereLoopAddBtreeIndex(
if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */
if( nOut ){
pNew->nOut = sqlite3LogEst(nOut);
if( nEq==1
/* TUNING: Mark terms as "low selectivity" if they seem likely
** to be true for half or more of the rows in the table.
** See tag-202002240-1 */
&& pNew->nOut+10 > pProbe->aiRowLogEst[0]
){
#if WHERETRACE_ENABLED /* 0x01 */
if( sqlite3WhereTrace & 0x01 ){
sqlite3DebugPrintf(
"STAT4 determines term has low selectivity:\n");
sqlite3WhereTermPrint(pTerm, 999);
}
#endif
pTerm->wtFlags |= TERM_HIGHTRUTH;
if( pTerm->wtFlags & TERM_HEURTRUTH ){
/* If the term has previously been used with an assumption of
** higher selectivity, then set the flag to rerun the
** loop computations. */
pBuilder->bldFlags2 |= SQLITE_BLDF2_2NDPASS;
}
}
if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut;
pNew->nOut -= nIn;
}
@ -3080,9 +3106,9 @@ static int whereLoopAddBtree(
}
}
pBuilder->bldFlags = 0;
pBuilder->bldFlags1 = 0;
rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0);
if( pBuilder->bldFlags==SQLITE_BLDF_INDEXED ){
if( pBuilder->bldFlags1==SQLITE_BLDF1_INDEXED ){
/* If a non-unique index is used, or if a prefix of the key for
** unique index is used (making the index functionally non-unique)
** then the sqlite_stat1 data becomes important for scoring the
@ -4537,6 +4563,28 @@ static int exprIsDeterministic(Expr *p){
return w.eCode;
}
#ifdef WHERETRACE_ENABLED
/*
** Display all WhereLoops in pWInfo
*/
static void showAllWhereLoops(WhereInfo *pWInfo, WhereClause *pWC){
if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */
WhereLoop *p;
int i;
static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz"
"ABCDEFGHIJKLMNOPQRSTUVWYXZ";
for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){
p->cId = zLabel[i%(sizeof(zLabel)-1)];
sqlite3WhereLoopPrint(p, pWC);
}
}
}
# define WHERETRACE_ALL_LOOPS(W,C) showAllWhereLoops(W,C)
#else
# define WHERETRACE_ALL_LOOPS(W,C)
#endif
/*
** Generate the beginning of the loop used for WHERE clause processing.
** The return value is a pointer to an opaque structure that contains
@ -4838,19 +4886,28 @@ WhereInfo *sqlite3WhereBegin(
if( nTabList!=1 || whereShortCut(&sWLB)==0 ){
rc = whereLoopAddAll(&sWLB);
if( rc ) goto whereBeginError;
#ifdef WHERETRACE_ENABLED
if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */
WhereLoop *p;
int i;
static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz"
"ABCDEFGHIJKLMNOPQRSTUVWYXZ";
for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){
p->cId = zLabel[i%(sizeof(zLabel)-1)];
sqlite3WhereLoopPrint(p, sWLB.pWC);
#ifdef SQLITE_ENABLE_STAT4
/* If one or more WhereTerm.truthProb values were used in estimating
** loop parameters, but then those truthProb values were subsequently
** changed based on STAT4 information while computing subsequent loops,
** then we need to rerun the whole loop building process so that all
** loops will be built using the revised truthProb values. */
if( sWLB.bldFlags2 & SQLITE_BLDF2_2NDPASS ){
WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC);
WHERETRACE(0xffff,
("**** Redo all loop computations due to"
" TERM_HIGHTRUTH changes ****\n"));
while( pWInfo->pLoops ){
WhereLoop *p = pWInfo->pLoops;
pWInfo->pLoops = p->pNextLoop;
whereLoopDelete(db, p);
}
rc = whereLoopAddAll(&sWLB);
if( rc ) goto whereBeginError;
}
#endif
WHERETRACE_ALL_LOOPS(pWInfo, sWLB.pWC);
wherePathSolver(pWInfo, 0);
if( db->mallocFailed ) goto whereBeginError;

View File

@ -291,6 +291,12 @@ struct WhereTerm {
#define TERM_LIKE 0x0400 /* The original LIKE operator */
#define TERM_IS 0x0800 /* Term.pExpr is an IS operator */
#define TERM_VARSELECT 0x1000 /* Term.pExpr contains a correlated sub-query */
#define TERM_HEURTRUTH 0x2000 /* Heuristic truthProb used */
#ifdef SQLITE_ENABLE_STAT4
# define TERM_HIGHTRUTH 0x4000 /* Term excludes few rows */
#else
# define TERM_HIGHTRUTH 0 /* Only used with STAT4 */
#endif
/*
** An instance of the WhereScan object is used as an iterator for locating
@ -405,13 +411,16 @@ struct WhereLoopBuilder {
UnpackedRecord *pRec; /* Probe for stat4 (if required) */
int nRecValid; /* Number of valid fields currently in pRec */
#endif
unsigned int bldFlags; /* SQLITE_BLDF_* flags */
unsigned char bldFlags1; /* First set of SQLITE_BLDF_* flags */
unsigned char bldFlags2; /* Second set of SQLITE_BLDF_* flags */
unsigned int iPlanLimit; /* Search limiter */
};
/* Allowed values for WhereLoopBuider.bldFlags */
#define SQLITE_BLDF_INDEXED 0x0001 /* An index is used */
#define SQLITE_BLDF_UNIQUE 0x0002 /* All keys of a UNIQUE index used */
#define SQLITE_BLDF1_INDEXED 0x0001 /* An index is used */
#define SQLITE_BLDF1_UNIQUE 0x0002 /* All keys of a UNIQUE index used */
#define SQLITE_BLDF2_2NDPASS 0x0004 /* Second builder pass needed */
/* The WhereLoopBuilder.iPlanLimit is used to limit the number of
** index+constraint combinations the query planner will consider for a

88
test/analyzeG.test Normal file
View File

@ -0,0 +1,88 @@
# 2020-02-23
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
# Tests for functionality related to ANALYZE.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !stat4 {
finish_test
return
}
set testprefix analyzeG
proc do_scan_order_test {tn sql expect} {
uplevel [list do_test $tn [subst -nocommands {
set res ""
db eval "explain query plan $sql" {
lappend res [set detail]
}
set res
}] [list {*}$expect]]
}
#-------------------------------------------------------------------------
# Test cases 1.* seek to verify that even if an index is not used, its
# stat4 data may be used by the planner to estimate the number of
# rows that match an unindexed constraint on the same column.
#
do_execsql_test 1.0 {
PRAGMA automatic_index = 0;
CREATE TABLE t1(a, x);
CREATE TABLE t2(b, y);
WITH s(i) AS (
SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100
)
INSERT INTO t1 SELECT (i%50), NULL FROM s;
WITH s(i) AS (
SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100
)
INSERT INTO t2 SELECT (CASE WHEN i<95 THEN 44 ELSE i END), NULL FROM s;
}
# Join tables t1 and t2. Both contain 100 rows. (a=44) matches 2 rows
# in "t1", (b=44) matches 95 rows in table "t2". But the planner doesn't
# know this, so it has no preference as to which order the tables are
# scanned in. In practice this means that tables are scanned in the order
# they are specified in in the FROM clause.
do_scan_order_test 1.1.1 {
SELECT * FROM t1, t2 WHERE a=44 AND b=44;
} {
{SCAN TABLE t1} {SCAN TABLE t2}
}
do_scan_order_test 1.1.2 {
SELECT * FROM t2, t1 WHERE a=44 AND b=44
} {
{SCAN TABLE t2} {SCAN TABLE t1}
}
do_execsql_test 1.2 {
CREATE INDEX t2b ON t2(b);
ANALYZE;
}
# Now, with the ANALYZE data, the planner knows that (b=44) matches a
# large number of rows. So it elects to scan table "t1" first, regardless
# of the order in which the tables are specified in the FROM clause.
do_scan_order_test 1.3.1 {
SELECT * FROM t1, t2 WHERE a=44 AND b=44;
} {
{SCAN TABLE t1} {SCAN TABLE t2}
}
do_scan_order_test 1.3.2 {
SELECT * FROM t2, t1 WHERE a=44 AND b=44
} {
{SCAN TABLE t1} {SCAN TABLE t2}
}
finish_test