Changes to support fragmentation analysis in sqlite3_analyzer. (CVS 3634)

FossilOrigin-Name: bd6bc3b8f06919000fb082087dff7bbd335d07e9
This commit is contained in:
drh 2007-02-10 19:22:35 +00:00
parent ec41ddac37
commit 50c6706305
6 changed files with 283 additions and 17 deletions

View File

@ -1,5 +1,5 @@
C Explicit\scollations\salways\soverride\simplicit\scollations.\s\sThis\sis\nbackwards\scompatible\ssince\sSQLite\shas\snot\spreviously\ssupported\nexplicit\scollations.\sNeed\sto\sadd\stests\sof\sthis\snew\sbehavior.\s(CVS\s3633)
D 2007-02-07T13:09:46
C Changes\sto\ssupport\sfragmentation\sanalysis\sin\ssqlite3_analyzer.\s(CVS\s3634)
D 2007-02-10T19:22:36
F Makefile.in 7fa74bf4359aa899da5586e394d17735f221315f
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -57,7 +57,7 @@ F src/alter.c 2c79ec40f65e33deaf90ca493422c74586e481a3
F src/analyze.c 7d2b7ab9a9c2fd6e55700f69064dfdd3e36d7a8a
F src/attach.c b11eb4d5d3fb99a10a626956bccc7215f6b68b16
F src/auth.c 902f4722661c796b97f007d9606bd7529c02597f
F src/btree.c 51aef6a4b18df165b83b332befd1447c011b4389
F src/btree.c 4d4bef16fbf4f53ec3b161cfe5bb19bbc27a281d
F src/btree.h 066444ee25bd6e6accb997bfd2cf5ace14dbcd00
F src/build.c 6bd68dc730b01c1727738f8e4b5c730eb0ddb421
F src/callback.c 31d22b4919c7645cbcbb1591ce2453e8c677c558
@ -102,7 +102,7 @@ F src/table.c 6d0da66dde26ee75614ed8f584a1996467088d06
F src/tclsqlite.c cd2b3b86ab07c0e0779f6c6e71e72c6c7dc1e704
F src/test1.c cb314bfa3e9251b545fa3669ec80a8c8a0a86310
F src/test2.c ca74a1d8aeb7d9606e8f6b762c5daf85c1a3f92b
F src/test3.c 875126eab6749f9d9e2b60b6ee6a65825b3d1fed
F src/test3.c ed494a126221c4b9f66f8f0445554ad749764709
F src/test4.c 8b784cd82de158a2317cb4ac4bc86f91ad315e25
F src/test5.c 7162f8526affb771c4ed256826eee7bb9eca265f
F src/test6.c 60a02961ceb7b3edc25f5dc5c1ac2556622a76de
@ -360,6 +360,7 @@ F test/where2.test 61d5b20d9bedc8788a773bbdc5b2ef887725928e
F test/where3.test 0a30fe9808b0fa01c46d0fcf4fac0bf6cf75bb30
F test/where4.test 3fcf53c5ea7af1db3980b3293c2a45b56605f26a
F tool/diffdb.c 7524b1b5df217c20cd0431f6789851a4e0cb191b
F tool/fragck.tcl 5265a95126abcf6ab357f7efa544787e5963f439
F tool/lemon.c 2938bec507110397c937bd8a03b0c9596a709a04
F tool/lempar.c fdc1672e97f72f72e76553038501da40fec9d251
F tool/memleak.awk 4e7690a51bf3ed757e611273d43fe3f65b510133
@ -373,7 +374,7 @@ F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816
F tool/showdb.c a086a3d788c7a23cb008317c3180ceb19f20bce0
F tool/showjournal.c ec3b171be148656827c4949fbfb8ab4370822f87
F tool/space_used.tcl f714c41a59e326b8b9042f415b628b561bafa06b
F tool/spaceanal.tcl f4ca4843c137db16124d680523f466044d5f0ba2
F tool/spaceanal.tcl 2614b0c929852547138d608f58535f33a49d27ec
F tool/speedtest.tcl 06c76698485ccf597b9e7dbb1ac70706eb873355
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F www/arch.fig d5f9752a4dbf242e9cfffffd3f5762b6c63b3bcf
@ -431,7 +432,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
P 2bd4b62a20219f939ac2ac22440dc7fc0449f766
R 7101481de1358da4b073980f7790ac38
P 3638823a629164e4158f76d03ff2cea1eab34e9d
R 51a9ab370b25f99fa741aa9c670f3eea
U drh
Z 02cb54f9b44027c71a4480afa634e551
Z bcbf10610bf7092c876385c4549956cc

View File

@ -1 +1 @@
3638823a629164e4158f76d03ff2cea1eab34e9d
bd6bc3b8f06919000fb082087dff7bbd335d07e9

View File

@ -9,7 +9,7 @@
** May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.334 2007/01/27 02:24:55 drh Exp $
** $Id: btree.c,v 1.335 2007/02/10 19:22:36 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
@ -5890,6 +5890,7 @@ int sqlite3BtreePageDump(Btree *p, int pgno, int recursive){
** aResult[7] = Header size in bytes
** aResult[8] = Local payload size
** aResult[9] = Parent page number
** aResult[10]= Page number of the first overflow page
**
** This routine is used for testing and debugging only.
*/
@ -5938,6 +5939,11 @@ int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){
}else{
aResult[9] = pPage->pParent->pgno;
}
if( tmpCur.info.iOverflow ){
aResult[10] = get4byte(&tmpCur.info.pCell[tmpCur.info.iOverflow]);
}else{
aResult[10] = 0;
}
releaseTempCursor(&tmpCur);
return SQLITE_OK;
}

View File

@ -13,7 +13,7 @@
** is not included in the SQLite library. It is used for automated
** testing of the SQLite library.
**
** $Id: test3.c,v 1.69 2007/01/27 02:24:56 drh Exp $
** $Id: test3.c,v 1.70 2007/02/10 19:22:36 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
@ -577,7 +577,7 @@ static int btree_integrity_check(
}
pBt = sqlite3TextToPtr(argv[1]);
nRoot = argc-2;
aRoot = malloc( sizeof(int)*(argc-2) );
aRoot = (int*)malloc( sizeof(int)*(argc-2) );
for(i=0; i<argc-2; i++){
if( Tcl_GetInt(interp, argv[i+2], &aRoot[i]) ) return TCL_ERROR;
}
@ -586,7 +586,7 @@ static int btree_integrity_check(
#else
zResult = 0;
#endif
free(aRoot);
free((void*)aRoot);
if( zResult ){
Tcl_AppendResult(interp, zResult, 0);
sqliteFree(zResult);
@ -1186,6 +1186,7 @@ static int btree_payload_size(
** aResult[7] = Header size in bytes
** aResult[8] = Local payload size
** aResult[9] = Parent page number
** aResult[10]= Page number of the first overflow page
*/
static int btree_cursor_info(
void *NotUsed,
@ -1197,7 +1198,7 @@ static int btree_cursor_info(
int rc;
int i, j;
int up;
int aResult[10];
int aResult[11];
char zBuf[400];
if( argc!=2 && argc!=3 ){
@ -1225,6 +1226,76 @@ static int btree_cursor_info(
return SQLITE_OK;
}
/*
** Copied from btree.c:
*/
static u32 get4byte(unsigned char *p){
return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
}
/*
** btree_ovfl_info BTREE CURSOR
**
** Given a cursor, return the sequence of pages number that form the
** overflow pages for the data of the entry that the cursor is point
** to.
*/
static int btree_ovfl_info(
void *NotUsed,
Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
int argc, /* Number of arguments */
const char **argv /* Text of each argument */
){
Btree *pBt;
BtCursor *pCur;
Pager *pPager;
int rc;
int n;
int dataSize;
u32 pgno;
void *pPage;
int aResult[11];
char zElem[100];
Tcl_DString str;
if( argc!=3 ){
Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
" BTREE CURSOR", 0);
return TCL_ERROR;
}
pBt = sqlite3TextToPtr(argv[1]);
pCur = sqlite3TextToPtr(argv[2]);
if( (*(void**)pCur) != (void*)pBt ){
Tcl_AppendResult(interp, "Cursor ", argv[2], " does not belong to btree ",
argv[1], 0);
return TCL_ERROR;
}
pPager = sqlite3BtreePager(pBt);
rc = sqlite3BtreeCursorInfo(pCur, aResult, 0);
if( rc ){
Tcl_AppendResult(interp, errorName(rc), 0);
return TCL_ERROR;
}
dataSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt);
Tcl_DStringInit(&str);
n = aResult[6] - aResult[8];
n = (n + dataSize - 1)/dataSize;
pgno = (u32)aResult[10];
while( pgno && n-- ){
sprintf(zElem, "%d", pgno);
Tcl_DStringAppendElement(&str, zElem);
if( sqlite3pager_get(pPager, pgno, &pPage)!=SQLITE_OK ){
Tcl_DStringFree(&str);
Tcl_AppendResult(interp, "unable to get page ", zElem, 0);
return TCL_ERROR;
}
pgno = get4byte((unsigned char*)pPage);
sqlite3pager_unref(pPage);
}
Tcl_DStringResult(interp, &str);
return SQLITE_OK;
}
/*
** The command is provided for the purpose of setting breakpoints.
** in regression test scripts.
@ -1440,6 +1511,7 @@ int Sqlitetest3_Init(Tcl_Interp *interp){
{ "btree_from_db", (Tcl_CmdProc*)btree_from_db },
{ "btree_set_cache_size", (Tcl_CmdProc*)btree_set_cache_size },
{ "btree_cursor_info", (Tcl_CmdProc*)btree_cursor_info },
{ "btree_ovfl_info", (Tcl_CmdProc*)btree_ovfl_info },
{ "btree_cursor_list", (Tcl_CmdProc*)btree_cursor_list },
};
int i;

149
tool/fragck.tcl Normal file
View File

@ -0,0 +1,149 @@
# Run this TCL script using "testfixture" to get a report that shows
# the sequence of database pages used by a particular table or index.
# This information is used for fragmentation analysis.
#
# Get the name of the database to analyze
#
if {[llength $argv]!=2} {
puts stderr "Usage: $argv0 database-name table-or-index-name"
exit 1
}
set file_to_analyze [lindex $argv 0]
if {![file exists $file_to_analyze]} {
puts stderr "No such file: $file_to_analyze"
exit 1
}
if {![file readable $file_to_analyze]} {
puts stderr "File is not readable: $file_to_analyze"
exit 1
}
if {[file size $file_to_analyze]<512} {
puts stderr "Empty or malformed database: $file_to_analyze"
exit 1
}
set objname [lindex $argv 1]
# Open the database
#
sqlite3 db [lindex $argv 0]
set DB [btree_open [lindex $argv 0] 1000 0]
# This proc is a wrapper around the btree_cursor_info command. The
# second argument is an open btree cursor returned by [btree_cursor].
# The first argument is the name of an array variable that exists in
# the scope of the caller. If the third argument is non-zero, then
# info is returned for the page that lies $up entries upwards in the
# tree-structure. (i.e. $up==1 returns the parent page, $up==2 the
# grandparent etc.)
#
# The following entries in that array are filled in with information retrieved
# using [btree_cursor_info]:
#
# $arrayvar(page_no) = The page number
# $arrayvar(entry_no) = The entry number
# $arrayvar(page_entries) = Total number of entries on this page
# $arrayvar(cell_size) = Cell size (local payload + header)
# $arrayvar(page_freebytes) = Number of free bytes on this page
# $arrayvar(page_freeblocks) = Number of free blocks on the page
# $arrayvar(payload_bytes) = Total payload size (local + overflow)
# $arrayvar(header_bytes) = Header size in bytes
# $arrayvar(local_payload_bytes) = Local payload size
# $arrayvar(parent) = Parent page number
#
proc cursor_info {arrayvar csr {up 0}} {
upvar $arrayvar a
foreach [list a(page_no) \
a(entry_no) \
a(page_entries) \
a(cell_size) \
a(page_freebytes) \
a(page_freeblocks) \
a(payload_bytes) \
a(header_bytes) \
a(local_payload_bytes) \
a(parent) \
a(first_ovfl) ] [btree_cursor_info $csr $up] break
}
# Determine the page-size of the database. This global variable is used
# throughout the script.
#
set pageSize [db eval {PRAGMA page_size}]
# Find the root page of table or index to be analyzed. Also find out
# if the object is a table or an index.
#
if {$objname=="sqlite_master"} {
set rootpage 1
set type table
} else {
db eval {
SELECT rootpage, type FROM sqlite_master
WHERE name=$objname
} break
if {![info exists rootpage]} {
puts stderr "no such table or index: $objname"
exit 1
}
if {$type!="table" && $type!="index"} {
puts stderr "$objname is something other than a table or index"
exit 1
}
if {![string is integer -strict $rootpage]} {
puts stderr "invalid root page for $objname: $rootpage"
exit 1
}
}
# The cursor $csr is pointing to an entry. Print out information
# about the page that $up levels above that page that contains
# the entry. If $up==0 use the page that contains the entry.
#
# If information about the page has been printed already, then
# this is a no-op.
#
proc page_info {csr up} {
global seen
cursor_info ci $csr $up
set pg $ci(page_no)
if {[info exists seen($pg)]} return
set seen($pg) 1
# Do parent pages first
#
if {$ci(parent)} {
page_info $csr [expr {$up+1}]
}
# Find the depth of this page
#
set depth 1
set i $up
while {$ci(parent)} {
incr i
incr depth
cursor_info ci $csr $i
}
# print the results
#
puts [format {LEVEL %d: %6d} $depth $pg]
}
# Loop through the object and print out page numbers
#
set csr [btree_cursor $DB $rootpage 0]
for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
page_info $csr 0
set i 1
foreach pg [btree_ovfl_info $DB $csr] {
puts [format {OVFL %3d: %6d} $i $pg]
incr i
}
}
exit 0

View File

@ -26,6 +26,10 @@ if {[file size $file_to_analyze]<512} {
exit 1
}
# Maximum distance between pages before we consider it a "gap"
#
set MAXGAP 3
# Open the database
#
sqlite3 db [lindex $argv 0]
@ -53,7 +57,8 @@ set tabledef\
ovfl_pages int, -- Number of overflow pages used
int_unused int, -- Number of unused bytes on interior pages
leaf_unused int, -- Number of unused bytes on primary pages
ovfl_unused int -- Number of unused bytes on overflow pages
ovfl_unused int, -- Number of unused bytes on overflow pages
gap_cnt int -- Number of gaps in the page layout
);}
mem eval $tabledef
@ -105,7 +110,8 @@ proc cursor_info {arrayvar csr {up 0}} {
a(payload_bytes) \
a(header_bytes) \
a(local_payload_bytes) \
a(parent) ] [btree_cursor_info $csr $up] {}
a(parent) \
a(first_ovfl) ] [btree_cursor_info $csr $up] break
}
# Determine the page-size of the database. This global variable is used
@ -145,6 +151,8 @@ foreach {name rootpage} [db eval $sql] {
set ovfl_pages $wideZero ;# Number of overflow pages used
set leaf_pages $wideZero ;# Number of leaf pages
set int_pages $wideZero ;# Number of interior pages
set gap_cnt 0 ;# Number of holes in the page sequence
set prev_pgno 0 ;# Last page number seen
# As the btree is traversed, the array variable $seen($pgno) is set to 1
# the first time page $pgno is encountered.
@ -180,6 +188,9 @@ foreach {name rootpage} [db eval $sql] {
set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
incr ovfl_pages $n
incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
set pglist [btree_ovfl_info $DB $csr]
} else {
set pglist {}
}
# If this is the first table entry analyzed for the page, then update
@ -191,6 +202,7 @@ foreach {name rootpage} [db eval $sql] {
set seen($ci(page_no)) 1
incr leaf_pages
incr unused_leaf $ci(page_freebytes)
set pglist "$ci(page_no) $pglist"
# Now check if the page has a parent that has not been analyzed. If
# so, update the $int_pages, $cnt_int_entry and $unused_int statistics
@ -210,8 +222,20 @@ foreach {name rootpage} [db eval $sql] {
incr int_pages
incr cnt_int_entry $ci(page_entries)
incr unused_int $ci(page_freebytes)
# parent pages come before their first child
set pglist "$ci(page_no) $pglist"
}
}
# Check the page list for fragmentation
#
foreach pg $pglist {
if {($pg<$prev_pgno || $pg>$prev_pgno+$MAXGAP) && $prev_pgno>0} {
incr gap_cnt
}
set prev_pgno $pg
}
}
btree_close_cursor $csr
@ -250,6 +274,7 @@ foreach {name rootpage} [db eval $sql] {
append sql ",$unused_int"
append sql ",$unused_leaf"
append sql ",$unused_ovfl"
append sql ",$gap_cnt"
append sql );
mem eval $sql
}
@ -279,6 +304,8 @@ foreach {name tbl_name rootpage} [db eval $sql] {
set mx_payload $wideZero ;# Maximum payload size
set ovfl_pages $wideZero ;# Number of overflow pages used
set leaf_pages $wideZero ;# Number of leaf pages
set gap_cnt 0 ;# Number of holes in the page sequence
set prev_pgno 0 ;# Last page number seen
# As the btree is traversed, the array variable $seen($pgno) is set to 1
# the first time page $pgno is encountered.
@ -324,6 +351,11 @@ foreach {name tbl_name rootpage} [db eval $sql] {
set seen($ci(page_no)) 1
incr leaf_pages
incr unused_leaf $ci(page_freebytes)
set pg $ci(page_no)
if {$prev_pgno>0 && ($prev_pgno<$pg-$MAXGAP || $prev_pgno>$pg)} {
incr gap_cnt
}
set prev_pgno $ci(page_no)
}
}
btree_close_cursor $csr
@ -355,6 +387,7 @@ foreach {name tbl_name rootpage} [db eval $sql] {
append sql ",0"
append sql ",$unused_leaf"
append sql ",$unused_ovfl"
append sql ",$gap_cnt"
append sql );
mem eval $sql
}
@ -420,7 +453,8 @@ proc subreport {title where} {
int(sum(ovfl_pages)) AS ovfl_pages,
int(sum(leaf_unused)) AS leaf_unused,
int(sum(int_unused)) AS int_unused,
int(sum(ovfl_unused)) AS ovfl_unused
int(sum(ovfl_unused)) AS ovfl_unused,
int(sum(gap_cnt)) AS gap_cnt
FROM space_used WHERE $where" {} {}
# Output the sub-report title, nicely decorated with * characters.
@ -450,6 +484,7 @@ proc subreport {title where} {
set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
set avg_payload [divide $payload $nleaf]
set avg_unused [divide $total_unused $nleaf]
set fragmentation [percent $gap_cnt $total_pages {fragmentation}]
if {$int_pages>0} {
# TODO: Is this formula correct?
set nTab [mem eval "
@ -476,6 +511,9 @@ proc subreport {title where} {
if {[info exists avg_fanout]} {
statline {Average fanout} $avg_fanout
}
if {$total_pages>1} {
statline {Fragmentation} $fragmentation
}
statline {Maximum payload per entry} $mx_payload
statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
if {$int_pages>0} {