sqlite/test/analyze8.test

# 2011 August 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is testing the capabilities of sqlite_stat4.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat4 {
  finish_test
  return
}

set testprefix analyze8

proc eqp {sql {db db}} {
  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
}

# Scenario:
#
#    Two indices.  One has mostly singleton entries, but for a few
#    values there are hundreds of entries.  The other has 10-20
#    entries per value.
#
# Verify that the query planner chooses the first index for the singleton
# entries and the second index for the others.
#
do_test 1.0 {
  db eval {
    CREATE TABLE t1(a,b,c,d);
    CREATE INDEX t1a ON t1(a);
    CREATE INDEX t1b ON t1(b);
    CREATE INDEX t1c ON t1(c);
  }
  for {set i 0} {$i<1000} {incr i} {
    if {$i%2==0} {set a $i} {set a [expr {($i%8)*100}]}
    set b [expr {$i/10}]
    set c [expr {$i/8}]
    set c [expr {$c*$c*$c}]
    db eval {INSERT INTO t1 VALUES($a,$b,$c,$i)}
  }
  db eval {ANALYZE}
} {}

# The a==100 comparison is expensive because there are many rows
# with a==100.  And so for those cases, choose the t1b index.
#
# Buf ro a==99 and a==101, there are far fewer rows so choose
# the t1a index.
#
do_test 1.1 {
  eqp {SELECT * FROM t1 WHERE a=100 AND b=55}
} {/*SEARCH TABLE t1 USING INDEX t1b (b=?)*/}
do_test 1.2 {
  eqp {SELECT * FROM t1 WHERE a=99 AND b=55}
} {/*SEARCH TABLE t1 USING INDEX t1a (a=?)*/}
do_test 1.3 {
  eqp {SELECT * FROM t1 WHERE a=101 AND b=55}
} {/*SEARCH TABLE t1 USING INDEX t1a (a=?)*/}
do_test 1.4 {
  eqp {SELECT * FROM t1 WHERE a=100 AND b=56}
} {/*SEARCH TABLE t1 USING INDEX t1b (b=?)*/}
do_test 1.5 {
  eqp {SELECT * FROM t1 WHERE a=99 AND b=56}
} {/*SEARCH TABLE t1 USING INDEX t1a (a=?)*/}
do_test 1.6 {
  eqp {SELECT * FROM t1 WHERE a=101 AND b=56}
} {/*SEARCH TABLE t1 USING INDEX t1a (a=?)*/}
do_test 2.1 {
  eqp {SELECT * FROM t1 WHERE a=100 AND b BETWEEN 50 AND 54}
} {/*SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)*/}

# There are many more values of c between 0 and 100000 than there are
# between 800000 and 900000.  So t1c is more selective for the latter
# range.
# 
# Test 3.2 is a little unstable. It depends on the planner estimating
# that (b BETWEEN 30 AND 34) will match more rows than (c BETWEEN
# 800000 AND 900000). Which is a pretty close call (50 vs. 32), so
# the planner could get it wrong with an unlucky set of samples. This
# case happens to work, but others ("b BETWEEN 40 AND 44" for example) 
# will fail.
#
do_execsql_test 3.0 {
  SELECT count(*) FROM t1 WHERE b BETWEEN 30 AND 34;
  SELECT count(*) FROM t1 WHERE c BETWEEN 0 AND 100000;
  SELECT count(*) FROM t1 WHERE c BETWEEN 800000 AND 900000;
} {50 376 32}
do_test 3.1 {
  eqp {SELECT * FROM t1 WHERE b BETWEEN 30 AND 34 AND c BETWEEN 0 AND 100000}
} {/*SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)*/}
do_test 3.2 {
  eqp {SELECT * FROM t1
       WHERE b BETWEEN 30 AND 34 AND c BETWEEN 800000 AND 900000}
} {/*SEARCH TABLE t1 USING INDEX t1c (c>? AND c<?)*/}
do_test 3.3 {
  eqp {SELECT * FROM t1 WHERE a=100 AND c BETWEEN 0 AND 100000}
} {/*SEARCH TABLE t1 USING INDEX t1a (a=?)*/}
do_test 3.4 {
  eqp {SELECT * FROM t1
       WHERE a=100 AND c BETWEEN 800000 AND 900000}
} {/*SEARCH TABLE t1 USING INDEX t1c (c>? AND c<?)*/}

finish_test
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`# 2011 August 13`
			`#`
			`# The author disclaims copyright to this source code. In place of`
			`# a legal notice, here is a blessing:`
			`#`
			`# May you do good and not evil.`
			`# May you find forgiveness for yourself and forgive others.`
			`# May you share freely, never taking more than you give.`
			`#`
			`#***********************************************************************`
			`#`
			`# This file implements tests for SQLite library. The focus of the tests`
Remove support for STAT3. The sqlite_stat3 tables are ignored, if they exist. STAT4 continues to work as it always has, and as it is a superset of STAT3 is the recommended replacement. FossilOrigin-Name: 1e17ea2fd1df4ad49138c787c8fe3207dd0c25c93f9001d52a9b69f8c12e841c 2019-08-08 18:24:17 +03:00			`# in this file is testing the capabilities of sqlite_stat4.`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`#`

			`set testdir [file dirname $argv0]`
			`source $testdir/tester.tcl`

Remove support for STAT3. The sqlite_stat3 tables are ignored, if they exist. STAT4 continues to work as it always has, and as it is a superset of STAT3 is the recommended replacement. FossilOrigin-Name: 1e17ea2fd1df4ad49138c787c8fe3207dd0c25c93f9001d52a9b69f8c12e841c 2019-08-08 18:24:17 +03:00			`ifcapable !stat4 {`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`finish_test`
			`return`
			`}`

			`set testprefix analyze8`

			`proc eqp {sql {db db}} {`
			`uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db`
			`}`

			`# Scenario:`
			`#`
			`# Two indices. One has mostly singleton entries, but for a few`
			`# values there are hundreds of entries. The other has 10-20`
			`# entries per value.`
			`#`
			`# Verify that the query planner chooses the first index for the singleton`
			`# entries and the second index for the others.`
			`#`
			`do_test 1.0 {`
			`db eval {`
			`CREATE TABLE t1(a,b,c,d);`
			`CREATE INDEX t1a ON t1(a);`
			`CREATE INDEX t1b ON t1(b);`
			`CREATE INDEX t1c ON t1(c);`
			`}`
			`for {set i 0} {$i<1000} {incr i} {`
			`if {$i%2==0} {set a $i} {set a [expr {($i%8)*100}]}`
			`set b [expr {$i/10}]`
			`set c [expr {$i/8}]`
			`set c [expr {$c$c$c}]`
			`db eval {INSERT INTO t1 VALUES($a,$b,$c,$i)}`
			`}`
			`db eval {ANALYZE}`
			`} {}`

			`# The a==100 comparison is expensive because there are many rows`
			`# with a==100. And so for those cases, choose the t1b index.`
			`#`
			`# Buf ro a==99 and a==101, there are far fewer rows so choose`
			`# the t1a index.`
			`#`
			`do_test 1.1 {`
			`eqp {SELECT * FROM t1 WHERE a=100 AND b=55}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1b (b=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 1.2 {`
			`eqp {SELECT * FROM t1 WHERE a=99 AND b=55}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1a (a=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 1.3 {`
			`eqp {SELECT * FROM t1 WHERE a=101 AND b=55}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1a (a=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 1.4 {`
			`eqp {SELECT * FROM t1 WHERE a=100 AND b=56}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1b (b=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 1.5 {`
			`eqp {SELECT * FROM t1 WHERE a=99 AND b=56}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1a (a=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 1.6 {`
			`eqp {SELECT * FROM t1 WHERE a=101 AND b=56}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1a (a=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 2.1 {`
			`eqp {SELECT * FROM t1 WHERE a=100 AND b BETWEEN 50 AND 54}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00
			`# There are many more values of c between 0 and 100000 than there are`
			`# between 800000 and 900000. So t1c is more selective for the latter`
			`# range.`
Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint. FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d 2013-08-08 15:48:57 +04:00			`#`
			`# Test 3.2 is a little unstable. It depends on the planner estimating`
Change the query planner to do a better job of estimating the number rows selected by a BETWEEN operator using STAT4 when both upper and lower bounds are contained within the same sample. FossilOrigin-Name: 2d36be5d9a1cdd4fd2d54fc4eeece32a81cbacc1 2014-11-05 22:26:12 +03:00			`# that (b BETWEEN 30 AND 34) will match more rows than (c BETWEEN`
Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint. FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d 2013-08-08 15:48:57 +04:00			`# 800000 AND 900000). Which is a pretty close call (50 vs. 32), so`
			`# the planner could get it wrong with an unlucky set of samples. This`
Change the way ANALYZE works to use a single cursor when scanning indices. FossilOrigin-Name: bdce612b35193abf72de1a563ea7962375b3574e 2013-08-14 23:54:12 +04:00			`# case happens to work, but others ("b BETWEEN 40 AND 44" for example)`
Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint. FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d 2013-08-08 15:48:57 +04:00			`# will fail.`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`#`
Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint. FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d 2013-08-08 15:48:57 +04:00			`do_execsql_test 3.0 {`
Change the query planner to do a better job of estimating the number rows selected by a BETWEEN operator using STAT4 when both upper and lower bounds are contained within the same sample. FossilOrigin-Name: 2d36be5d9a1cdd4fd2d54fc4eeece32a81cbacc1 2014-11-05 22:26:12 +03:00			`SELECT count(*) FROM t1 WHERE b BETWEEN 30 AND 34;`
Fix a bug in using stat4 data to estimate the number of rows selected by a range constraint. FossilOrigin-Name: f783938ea999731ea073cd2c78e278095f7bea6d 2013-08-08 15:48:57 +04:00			`SELECT count(*) FROM t1 WHERE c BETWEEN 0 AND 100000;`
			`SELECT count(*) FROM t1 WHERE c BETWEEN 800000 AND 900000;`
			`} {50 376 32}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 3.1 {`
Change the query planner to do a better job of estimating the number rows selected by a BETWEEN operator using STAT4 when both upper and lower bounds are contained within the same sample. FossilOrigin-Name: 2d36be5d9a1cdd4fd2d54fc4eeece32a81cbacc1 2014-11-05 22:26:12 +03:00			`eqp {SELECT * FROM t1 WHERE b BETWEEN 30 AND 34 AND c BETWEEN 0 AND 100000}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 3.2 {`
			`eqp {SELECT * FROM t1`
Change the query planner to do a better job of estimating the number rows selected by a BETWEEN operator using STAT4 when both upper and lower bounds are contained within the same sample. FossilOrigin-Name: 2d36be5d9a1cdd4fd2d54fc4eeece32a81cbacc1 2014-11-05 22:26:12 +03:00			`WHERE b BETWEEN 30 AND 34 AND c BETWEEN 800000 AND 900000}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1c (c>? AND c<?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 3.3 {`
			`eqp {SELECT * FROM t1 WHERE a=100 AND c BETWEEN 0 AND 100000}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1a (a=?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00			`do_test 3.4 {`
			`eqp {SELECT * FROM t1`
			`WHERE a=100 AND c BETWEEN 800000 AND 900000}`
More test case updates. Tests are all running now. FossilOrigin-Name: dab5e5294813891469660cceb211ac1a1e526715bb57dcdbb1ab90321e6a4dad 2018-05-02 22:42:33 +03:00			`} {/SEARCH TABLE t1 USING INDEX t1c (c>? AND c<?)/}`
Add the analyze8.test test module for sqlite_stat3. FossilOrigin-Name: 2c83ac89dc5a6017587defb541c9f3731b98892a 2011-08-16 05:15:12 +04:00
			`finish_test`